rf_netbsdkintf.c revision 1.117.6.3 1 /* $NetBSD: rf_netbsdkintf.c,v 1.117.6.3 2002/07/15 10:35:50 gehenna Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.3 2002/07/15 10:35:50 gehenna Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_acctrace.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_debugMem.h"
152 #include "rf_kintf.h"
153 #include "rf_options.h"
154 #include "rf_driver.h"
155 #include "rf_parityscan.h"
156 #include "rf_debugprint.h"
157 #include "rf_threadstuff.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit(RF_Raid_t *);
184
185 void raidattach(int);
186
187 dev_type_open(raidopen);
188 dev_type_close(raidclose);
189 dev_type_read(raidread);
190 dev_type_write(raidwrite);
191 dev_type_ioctl(raidioctl);
192 dev_type_strategy(raidstrategy);
193 dev_type_dump(raiddump);
194 dev_type_size(raidsize);
195
196 const struct bdevsw raid_bdevsw = {
197 raidopen, raidclose, raidstrategy, raidioctl,
198 raiddump, raidsize, D_DISK
199 };
200
201 const struct cdevsw raid_cdevsw = {
202 raidopen, raidclose, raidread, raidwrite, raidioctl,
203 nostop, notty, nopoll, nommap, D_DISK
204 };
205
206 /*
207 * Pilfered from ccd.c
208 */
209
210 struct raidbuf {
211 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
212 struct buf *rf_obp; /* ptr. to original I/O buf */
213 int rf_flags; /* misc. flags */
214 RF_DiskQueueData_t *req;/* the request that this was part of.. */
215 };
216
217 /* component buffer pool */
218 struct pool raidframe_cbufpool;
219
220 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
221 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
222
223 /* XXX Not sure if the following should be replacing the raidPtrs above,
224 or if it should be used in conjunction with that...
225 */
226
227 struct raid_softc {
228 int sc_flags; /* flags */
229 int sc_cflags; /* configuration flags */
230 size_t sc_size; /* size of the raid device */
231 char sc_xname[20]; /* XXX external name */
232 struct disk sc_dkdev; /* generic disk device info */
233 struct buf_queue buf_queue; /* used for the device queue */
234 };
235 /* sc_flags */
236 #define RAIDF_INITED 0x01 /* unit has been initialized */
237 #define RAIDF_WLABEL 0x02 /* label area is writable */
238 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED 0x80 /* unit is locked */
241
242 #define raidunit(x) DISKUNIT(x)
243 int numraid = 0;
244
245 /*
246 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
247 * Be aware that large numbers can allow the driver to consume a lot of
248 * kernel memory, especially on writes, and in degraded mode reads.
249 *
250 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
251 * a single 64K write will typically require 64K for the old data,
252 * 64K for the old parity, and 64K for the new parity, for a total
253 * of 192K (if the parity buffer is not re-used immediately).
254 * Even it if is used immediately, that's still 128K, which when multiplied
255 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
256 *
257 * Now in degraded mode, for example, a 64K read on the above setup may
258 * require data reconstruction, which will require *all* of the 4 remaining
259 * disks to participate -- 4 * 32K/disk == 128K again.
260 */
261
262 #ifndef RAIDOUTSTANDING
263 #define RAIDOUTSTANDING 6
264 #endif
265
266 #define RAIDLABELDEV(dev) \
267 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
268
269 /* declared here, and made public, for the benefit of KVM stuff.. */
270 struct raid_softc *raid_softc;
271
272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
273 struct disklabel *);
274 static void raidgetdisklabel(dev_t);
275 static void raidmakedisklabel(struct raid_softc *);
276
277 static int raidlock(struct raid_softc *);
278 static void raidunlock(struct raid_softc *);
279
280 static void rf_markalldirty(RF_Raid_t *);
281 void rf_mountroot_hook(struct device *);
282
283 struct device *raidrootdev;
284
285 void rf_ReconThread(struct rf_recon_req *);
286 /* XXX what I want is: */
287 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
288 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
289 void rf_CopybackThread(RF_Raid_t *raidPtr);
290 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
291 void rf_buildroothack(void *);
292
293 RF_AutoConfig_t *rf_find_raid_components(void);
294 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
295 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
296 static int rf_reasonable_label(RF_ComponentLabel_t *);
297 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
298 int rf_set_autoconfig(RF_Raid_t *, int);
299 int rf_set_rootpartition(RF_Raid_t *, int);
300 void rf_release_all_vps(RF_ConfigSet_t *);
301 void rf_cleanup_config_set(RF_ConfigSet_t *);
302 int rf_have_enough_components(RF_ConfigSet_t *);
303 int rf_auto_config_set(RF_ConfigSet_t *, int *);
304
305 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
306 allow autoconfig to take place.
307 Note that this is overridden by having
308 RAID_AUTOCONFIG as an option in the
309 kernel config file. */
310
311 void
312 raidattach(num)
313 int num;
314 {
315 int raidID;
316 int i, rc;
317 RF_AutoConfig_t *ac_list; /* autoconfig list */
318 RF_ConfigSet_t *config_sets;
319
320 #ifdef DEBUG
321 printf("raidattach: Asked for %d units\n", num);
322 #endif
323
324 if (num <= 0) {
325 #ifdef DIAGNOSTIC
326 panic("raidattach: count <= 0");
327 #endif
328 return;
329 }
330 /* This is where all the initialization stuff gets done. */
331
332 numraid = num;
333
334 /* Make some space for requested number of units... */
335
336 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
337 if (raidPtrs == NULL) {
338 panic("raidPtrs is NULL!!\n");
339 }
340
341 /* Initialize the component buffer pool. */
342 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
343 0, 0, "raidpl", NULL);
344
345 rc = rf_mutex_init(&rf_sparet_wait_mutex);
346 if (rc) {
347 RF_PANIC();
348 }
349
350 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
351
352 for (i = 0; i < num; i++)
353 raidPtrs[i] = NULL;
354 rc = rf_BootRaidframe();
355 if (rc == 0)
356 printf("Kernelized RAIDframe activated\n");
357 else
358 panic("Serious error booting RAID!!\n");
359
360 /* put together some datastructures like the CCD device does.. This
361 * lets us lock the device and what-not when it gets opened. */
362
363 raid_softc = (struct raid_softc *)
364 malloc(num * sizeof(struct raid_softc),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raid_softc == NULL) {
367 printf("WARNING: no memory for RAIDframe driver\n");
368 return;
369 }
370
371 memset(raid_softc, 0, num * sizeof(struct raid_softc));
372
373 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
374 M_RAIDFRAME, M_NOWAIT);
375 if (raidrootdev == NULL) {
376 panic("No memory for RAIDframe driver!!?!?!\n");
377 }
378
379 for (raidID = 0; raidID < num; raidID++) {
380 BUFQ_INIT(&raid_softc[raidID].buf_queue);
381
382 raidrootdev[raidID].dv_class = DV_DISK;
383 raidrootdev[raidID].dv_cfdata = NULL;
384 raidrootdev[raidID].dv_unit = raidID;
385 raidrootdev[raidID].dv_parent = NULL;
386 raidrootdev[raidID].dv_flags = 0;
387 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
388
389 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
390 (RF_Raid_t *));
391 if (raidPtrs[raidID] == NULL) {
392 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
393 numraid = raidID;
394 return;
395 }
396 }
397
398 #ifdef RAID_AUTOCONFIG
399 raidautoconfig = 1;
400 #endif
401
402 if (raidautoconfig) {
403 /* 1. locate all RAID components on the system */
404
405 #if DEBUG
406 printf("Searching for raid components...\n");
407 #endif
408 ac_list = rf_find_raid_components();
409
410 /* 2. sort them into their respective sets */
411
412 config_sets = rf_create_auto_sets(ac_list);
413
414 /* 3. evaluate each set and configure the valid ones
415 This gets done in rf_buildroothack() */
416
417 /* schedule the creation of the thread to do the
418 "/ on RAID" stuff */
419
420 kthread_create(rf_buildroothack,config_sets);
421
422 #if 0
423 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
424 #endif
425 }
426
427 }
428
429 void
430 rf_buildroothack(arg)
431 void *arg;
432 {
433 RF_ConfigSet_t *config_sets = arg;
434 RF_ConfigSet_t *cset;
435 RF_ConfigSet_t *next_cset;
436 int retcode;
437 int raidID;
438 int rootID;
439 int num_root;
440
441 rootID = 0;
442 num_root = 0;
443 cset = config_sets;
444 while(cset != NULL ) {
445 next_cset = cset->next;
446 if (rf_have_enough_components(cset) &&
447 cset->ac->clabel->autoconfigure==1) {
448 retcode = rf_auto_config_set(cset,&raidID);
449 if (!retcode) {
450 if (cset->rootable) {
451 rootID = raidID;
452 num_root++;
453 }
454 } else {
455 /* The autoconfig didn't work :( */
456 #if DEBUG
457 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
458 #endif
459 rf_release_all_vps(cset);
460 }
461 } else {
462 /* we're not autoconfiguring this set...
463 release the associated resources */
464 rf_release_all_vps(cset);
465 }
466 /* cleanup */
467 rf_cleanup_config_set(cset);
468 cset = next_cset;
469 }
470
471 /* we found something bootable... */
472
473 if (num_root == 1) {
474 booted_device = &raidrootdev[rootID];
475 } else if (num_root > 1) {
476 /* we can't guess.. require the user to answer... */
477 boothowto |= RB_ASKNAME;
478 }
479 }
480
481
482 int
483 raidsize(dev)
484 dev_t dev;
485 {
486 struct raid_softc *rs;
487 struct disklabel *lp;
488 int part, unit, omask, size;
489
490 unit = raidunit(dev);
491 if (unit >= numraid)
492 return (-1);
493 rs = &raid_softc[unit];
494
495 if ((rs->sc_flags & RAIDF_INITED) == 0)
496 return (-1);
497
498 part = DISKPART(dev);
499 omask = rs->sc_dkdev.dk_openmask & (1 << part);
500 lp = rs->sc_dkdev.dk_label;
501
502 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
503 return (-1);
504
505 if (lp->d_partitions[part].p_fstype != FS_SWAP)
506 size = -1;
507 else
508 size = lp->d_partitions[part].p_size *
509 (lp->d_secsize / DEV_BSIZE);
510
511 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
512 return (-1);
513
514 return (size);
515
516 }
517
518 int
519 raiddump(dev, blkno, va, size)
520 dev_t dev;
521 daddr_t blkno;
522 caddr_t va;
523 size_t size;
524 {
525 /* Not implemented. */
526 return ENXIO;
527 }
528 /* ARGSUSED */
529 int
530 raidopen(dev, flags, fmt, p)
531 dev_t dev;
532 int flags, fmt;
533 struct proc *p;
534 {
535 int unit = raidunit(dev);
536 struct raid_softc *rs;
537 struct disklabel *lp;
538 int part, pmask;
539 int error = 0;
540
541 if (unit >= numraid)
542 return (ENXIO);
543 rs = &raid_softc[unit];
544
545 if ((error = raidlock(rs)) != 0)
546 return (error);
547 lp = rs->sc_dkdev.dk_label;
548
549 part = DISKPART(dev);
550 pmask = (1 << part);
551
552 db1_printf(("Opening raid device number: %d partition: %d\n",
553 unit, part));
554
555
556 if ((rs->sc_flags & RAIDF_INITED) &&
557 (rs->sc_dkdev.dk_openmask == 0))
558 raidgetdisklabel(dev);
559
560 /* make sure that this partition exists */
561
562 if (part != RAW_PART) {
563 db1_printf(("Not a raw partition..\n"));
564 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
565 ((part >= lp->d_npartitions) ||
566 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
567 error = ENXIO;
568 raidunlock(rs);
569 db1_printf(("Bailing out...\n"));
570 return (error);
571 }
572 }
573 /* Prevent this unit from being unconfigured while open. */
574 switch (fmt) {
575 case S_IFCHR:
576 rs->sc_dkdev.dk_copenmask |= pmask;
577 break;
578
579 case S_IFBLK:
580 rs->sc_dkdev.dk_bopenmask |= pmask;
581 break;
582 }
583
584 if ((rs->sc_dkdev.dk_openmask == 0) &&
585 ((rs->sc_flags & RAIDF_INITED) != 0)) {
586 /* First one... mark things as dirty... Note that we *MUST*
587 have done a configure before this. I DO NOT WANT TO BE
588 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
589 THAT THEY BELONG TOGETHER!!!!! */
590 /* XXX should check to see if we're only open for reading
591 here... If so, we needn't do this, but then need some
592 other way of keeping track of what's happened.. */
593
594 rf_markalldirty( raidPtrs[unit] );
595 }
596
597
598 rs->sc_dkdev.dk_openmask =
599 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
600
601 raidunlock(rs);
602
603 return (error);
604
605
606 }
607 /* ARGSUSED */
608 int
609 raidclose(dev, flags, fmt, p)
610 dev_t dev;
611 int flags, fmt;
612 struct proc *p;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int error = 0;
617 int part;
618
619 if (unit >= numraid)
620 return (ENXIO);
621 rs = &raid_softc[unit];
622
623 if ((error = raidlock(rs)) != 0)
624 return (error);
625
626 part = DISKPART(dev);
627
628 /* ...that much closer to allowing unconfiguration... */
629 switch (fmt) {
630 case S_IFCHR:
631 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
632 break;
633
634 case S_IFBLK:
635 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
636 break;
637 }
638 rs->sc_dkdev.dk_openmask =
639 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
640
641 if ((rs->sc_dkdev.dk_openmask == 0) &&
642 ((rs->sc_flags & RAIDF_INITED) != 0)) {
643 /* Last one... device is not unconfigured yet.
644 Device shutdown has taken care of setting the
645 clean bits if RAIDF_INITED is not set
646 mark things as clean... */
647 #if 0
648 printf("Last one on raid%d. Updating status.\n",unit);
649 #endif
650 rf_update_component_labels(raidPtrs[unit],
651 RF_FINAL_COMPONENT_UPDATE);
652 if (doing_shutdown) {
653 /* last one, and we're going down, so
654 lights out for this RAID set too. */
655 error = rf_Shutdown(raidPtrs[unit]);
656
657 /* It's no longer initialized... */
658 rs->sc_flags &= ~RAIDF_INITED;
659
660 /* Detach the disk. */
661 disk_detach(&rs->sc_dkdev);
662 }
663 }
664
665 raidunlock(rs);
666 return (0);
667
668 }
669
670 void
671 raidstrategy(bp)
672 struct buf *bp;
673 {
674 int s;
675
676 unsigned int raidID = raidunit(bp->b_dev);
677 RF_Raid_t *raidPtr;
678 struct raid_softc *rs = &raid_softc[raidID];
679 struct disklabel *lp;
680 int wlabel;
681
682 if ((rs->sc_flags & RAIDF_INITED) ==0) {
683 bp->b_error = ENXIO;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 if (raidID >= numraid || !raidPtrs[raidID]) {
690 bp->b_error = ENODEV;
691 bp->b_flags |= B_ERROR;
692 bp->b_resid = bp->b_bcount;
693 biodone(bp);
694 return;
695 }
696 raidPtr = raidPtrs[raidID];
697 if (!raidPtr->valid) {
698 bp->b_error = ENODEV;
699 bp->b_flags |= B_ERROR;
700 bp->b_resid = bp->b_bcount;
701 biodone(bp);
702 return;
703 }
704 if (bp->b_bcount == 0) {
705 db1_printf(("b_bcount is zero..\n"));
706 biodone(bp);
707 return;
708 }
709 lp = rs->sc_dkdev.dk_label;
710
711 /*
712 * Do bounds checking and adjust transfer. If there's an
713 * error, the bounds check will flag that for us.
714 */
715
716 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
717 if (DISKPART(bp->b_dev) != RAW_PART)
718 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
719 db1_printf(("Bounds check failed!!:%d %d\n",
720 (int) bp->b_blkno, (int) wlabel));
721 biodone(bp);
722 return;
723 }
724 s = splbio();
725
726 bp->b_resid = 0;
727
728 /* stuff it onto our queue */
729 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
730
731 raidstart(raidPtrs[raidID]);
732
733 splx(s);
734 }
735 /* ARGSUSED */
736 int
737 raidread(dev, uio, flags)
738 dev_t dev;
739 struct uio *uio;
740 int flags;
741 {
742 int unit = raidunit(dev);
743 struct raid_softc *rs;
744 int part;
745
746 if (unit >= numraid)
747 return (ENXIO);
748 rs = &raid_softc[unit];
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752 part = DISKPART(dev);
753
754 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
755
756 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
757
758 }
759 /* ARGSUSED */
760 int
761 raidwrite(dev, uio, flags)
762 dev_t dev;
763 struct uio *uio;
764 int flags;
765 {
766 int unit = raidunit(dev);
767 struct raid_softc *rs;
768
769 if (unit >= numraid)
770 return (ENXIO);
771 rs = &raid_softc[unit];
772
773 if ((rs->sc_flags & RAIDF_INITED) == 0)
774 return (ENXIO);
775 db1_printf(("raidwrite\n"));
776 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
777
778 }
779
780 int
781 raidioctl(dev, cmd, data, flag, p)
782 dev_t dev;
783 u_long cmd;
784 caddr_t data;
785 int flag;
786 struct proc *p;
787 {
788 int unit = raidunit(dev);
789 int error = 0;
790 int part, pmask;
791 struct raid_softc *rs;
792 RF_Config_t *k_cfg, *u_cfg;
793 RF_Raid_t *raidPtr;
794 RF_RaidDisk_t *diskPtr;
795 RF_AccTotals_t *totals;
796 RF_DeviceConfig_t *d_cfg, **ucfgp;
797 u_char *specific_buf;
798 int retcode = 0;
799 int row;
800 int column;
801 int raidid;
802 struct rf_recon_req *rrcopy, *rr;
803 RF_ComponentLabel_t *clabel;
804 RF_ComponentLabel_t ci_label;
805 RF_ComponentLabel_t **clabel_ptr;
806 RF_SingleComponent_t *sparePtr,*componentPtr;
807 RF_SingleComponent_t hot_spare;
808 RF_SingleComponent_t component;
809 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
810 int i, j, d;
811 #ifdef __HAVE_OLD_DISKLABEL
812 struct disklabel newlabel;
813 #endif
814
815 if (unit >= numraid)
816 return (ENXIO);
817 rs = &raid_softc[unit];
818 raidPtr = raidPtrs[unit];
819
820 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
821 (int) DISKPART(dev), (int) unit, (int) cmd));
822
823 /* Must be open for writes for these commands... */
824 switch (cmd) {
825 case DIOCSDINFO:
826 case DIOCWDINFO:
827 #ifdef __HAVE_OLD_DISKLABEL
828 case ODIOCWDINFO:
829 case ODIOCSDINFO:
830 #endif
831 case DIOCWLABEL:
832 if ((flag & FWRITE) == 0)
833 return (EBADF);
834 }
835
836 /* Must be initialized for these... */
837 switch (cmd) {
838 case DIOCGDINFO:
839 case DIOCSDINFO:
840 case DIOCWDINFO:
841 #ifdef __HAVE_OLD_DISKLABEL
842 case ODIOCGDINFO:
843 case ODIOCWDINFO:
844 case ODIOCSDINFO:
845 case ODIOCGDEFLABEL:
846 #endif
847 case DIOCGPART:
848 case DIOCWLABEL:
849 case DIOCGDEFLABEL:
850 case RAIDFRAME_SHUTDOWN:
851 case RAIDFRAME_REWRITEPARITY:
852 case RAIDFRAME_GET_INFO:
853 case RAIDFRAME_RESET_ACCTOTALS:
854 case RAIDFRAME_GET_ACCTOTALS:
855 case RAIDFRAME_KEEP_ACCTOTALS:
856 case RAIDFRAME_GET_SIZE:
857 case RAIDFRAME_FAIL_DISK:
858 case RAIDFRAME_COPYBACK:
859 case RAIDFRAME_CHECK_RECON_STATUS:
860 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
861 case RAIDFRAME_GET_COMPONENT_LABEL:
862 case RAIDFRAME_SET_COMPONENT_LABEL:
863 case RAIDFRAME_ADD_HOT_SPARE:
864 case RAIDFRAME_REMOVE_HOT_SPARE:
865 case RAIDFRAME_INIT_LABELS:
866 case RAIDFRAME_REBUILD_IN_PLACE:
867 case RAIDFRAME_CHECK_PARITY:
868 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
869 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
870 case RAIDFRAME_CHECK_COPYBACK_STATUS:
871 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
872 case RAIDFRAME_SET_AUTOCONFIG:
873 case RAIDFRAME_SET_ROOT:
874 case RAIDFRAME_DELETE_COMPONENT:
875 case RAIDFRAME_INCORPORATE_HOT_SPARE:
876 if ((rs->sc_flags & RAIDF_INITED) == 0)
877 return (ENXIO);
878 }
879
880 switch (cmd) {
881
882 /* configure the system */
883 case RAIDFRAME_CONFIGURE:
884
885 if (raidPtr->valid) {
886 /* There is a valid RAID set running on this unit! */
887 printf("raid%d: Device already configured!\n",unit);
888 return(EINVAL);
889 }
890
891 /* copy-in the configuration information */
892 /* data points to a pointer to the configuration structure */
893
894 u_cfg = *((RF_Config_t **) data);
895 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
896 if (k_cfg == NULL) {
897 return (ENOMEM);
898 }
899 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
900 sizeof(RF_Config_t));
901 if (retcode) {
902 RF_Free(k_cfg, sizeof(RF_Config_t));
903 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
904 retcode));
905 return (retcode);
906 }
907 /* allocate a buffer for the layout-specific data, and copy it
908 * in */
909 if (k_cfg->layoutSpecificSize) {
910 if (k_cfg->layoutSpecificSize > 10000) {
911 /* sanity check */
912 RF_Free(k_cfg, sizeof(RF_Config_t));
913 return (EINVAL);
914 }
915 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
916 (u_char *));
917 if (specific_buf == NULL) {
918 RF_Free(k_cfg, sizeof(RF_Config_t));
919 return (ENOMEM);
920 }
921 retcode = copyin(k_cfg->layoutSpecific,
922 (caddr_t) specific_buf,
923 k_cfg->layoutSpecificSize);
924 if (retcode) {
925 RF_Free(k_cfg, sizeof(RF_Config_t));
926 RF_Free(specific_buf,
927 k_cfg->layoutSpecificSize);
928 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
929 retcode));
930 return (retcode);
931 }
932 } else
933 specific_buf = NULL;
934 k_cfg->layoutSpecific = specific_buf;
935
936 /* should do some kind of sanity check on the configuration.
937 * Store the sum of all the bytes in the last byte? */
938
939 /* configure the system */
940
941 /*
942 * Clear the entire RAID descriptor, just to make sure
943 * there is no stale data left in the case of a
944 * reconfiguration
945 */
946 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
947 raidPtr->raidid = unit;
948
949 retcode = rf_Configure(raidPtr, k_cfg, NULL);
950
951 if (retcode == 0) {
952
953 /* allow this many simultaneous IO's to
954 this RAID device */
955 raidPtr->openings = RAIDOUTSTANDING;
956
957 raidinit(raidPtr);
958 rf_markalldirty(raidPtr);
959 }
960 /* free the buffers. No return code here. */
961 if (k_cfg->layoutSpecificSize) {
962 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
963 }
964 RF_Free(k_cfg, sizeof(RF_Config_t));
965
966 return (retcode);
967
968 /* shutdown the system */
969 case RAIDFRAME_SHUTDOWN:
970
971 if ((error = raidlock(rs)) != 0)
972 return (error);
973
974 /*
975 * If somebody has a partition mounted, we shouldn't
976 * shutdown.
977 */
978
979 part = DISKPART(dev);
980 pmask = (1 << part);
981 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
982 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
983 (rs->sc_dkdev.dk_copenmask & pmask))) {
984 raidunlock(rs);
985 return (EBUSY);
986 }
987
988 retcode = rf_Shutdown(raidPtr);
989
990 /* It's no longer initialized... */
991 rs->sc_flags &= ~RAIDF_INITED;
992
993 /* Detach the disk. */
994 disk_detach(&rs->sc_dkdev);
995
996 raidunlock(rs);
997
998 return (retcode);
999 case RAIDFRAME_GET_COMPONENT_LABEL:
1000 clabel_ptr = (RF_ComponentLabel_t **) data;
1001 /* need to read the component label for the disk indicated
1002 by row,column in clabel */
1003
1004 /* For practice, let's get it directly fromdisk, rather
1005 than from the in-core copy */
1006 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1007 (RF_ComponentLabel_t *));
1008 if (clabel == NULL)
1009 return (ENOMEM);
1010
1011 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1012
1013 retcode = copyin( *clabel_ptr, clabel,
1014 sizeof(RF_ComponentLabel_t));
1015
1016 if (retcode) {
1017 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1018 return(retcode);
1019 }
1020
1021 row = clabel->row;
1022 column = clabel->column;
1023
1024 if ((row < 0) || (row >= raidPtr->numRow) ||
1025 (column < 0) || (column >= raidPtr->numCol +
1026 raidPtr->numSpare)) {
1027 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1028 return(EINVAL);
1029 }
1030
1031 raidread_component_label(raidPtr->Disks[row][column].dev,
1032 raidPtr->raid_cinfo[row][column].ci_vp,
1033 clabel );
1034
1035 retcode = copyout((caddr_t) clabel,
1036 (caddr_t) *clabel_ptr,
1037 sizeof(RF_ComponentLabel_t));
1038 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1039 return (retcode);
1040
1041 case RAIDFRAME_SET_COMPONENT_LABEL:
1042 clabel = (RF_ComponentLabel_t *) data;
1043
1044 /* XXX check the label for valid stuff... */
1045 /* Note that some things *should not* get modified --
1046 the user should be re-initing the labels instead of
1047 trying to patch things.
1048 */
1049
1050 raidid = raidPtr->raidid;
1051 printf("raid%d: Got component label:\n", raidid);
1052 printf("raid%d: Version: %d\n", raidid, clabel->version);
1053 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1054 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1055 printf("raid%d: Row: %d\n", raidid, clabel->row);
1056 printf("raid%d: Column: %d\n", raidid, clabel->column);
1057 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1058 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1059 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1060 printf("raid%d: Status: %d\n", raidid, clabel->status);
1061
1062 row = clabel->row;
1063 column = clabel->column;
1064
1065 if ((row < 0) || (row >= raidPtr->numRow) ||
1066 (column < 0) || (column >= raidPtr->numCol)) {
1067 return(EINVAL);
1068 }
1069
1070 /* XXX this isn't allowed to do anything for now :-) */
1071
1072 /* XXX and before it is, we need to fill in the rest
1073 of the fields!?!?!?! */
1074 #if 0
1075 raidwrite_component_label(
1076 raidPtr->Disks[row][column].dev,
1077 raidPtr->raid_cinfo[row][column].ci_vp,
1078 clabel );
1079 #endif
1080 return (0);
1081
1082 case RAIDFRAME_INIT_LABELS:
1083 clabel = (RF_ComponentLabel_t *) data;
1084 /*
1085 we only want the serial number from
1086 the above. We get all the rest of the information
1087 from the config that was used to create this RAID
1088 set.
1089 */
1090
1091 raidPtr->serial_number = clabel->serial_number;
1092
1093 raid_init_component_label(raidPtr, &ci_label);
1094 ci_label.serial_number = clabel->serial_number;
1095
1096 for(row=0;row<raidPtr->numRow;row++) {
1097 ci_label.row = row;
1098 for(column=0;column<raidPtr->numCol;column++) {
1099 diskPtr = &raidPtr->Disks[row][column];
1100 if (!RF_DEAD_DISK(diskPtr->status)) {
1101 ci_label.partitionSize = diskPtr->partitionSize;
1102 ci_label.column = column;
1103 raidwrite_component_label(
1104 raidPtr->Disks[row][column].dev,
1105 raidPtr->raid_cinfo[row][column].ci_vp,
1106 &ci_label );
1107 }
1108 }
1109 }
1110
1111 return (retcode);
1112 case RAIDFRAME_SET_AUTOCONFIG:
1113 d = rf_set_autoconfig(raidPtr, *(int *) data);
1114 printf("raid%d: New autoconfig value is: %d\n",
1115 raidPtr->raidid, d);
1116 *(int *) data = d;
1117 return (retcode);
1118
1119 case RAIDFRAME_SET_ROOT:
1120 d = rf_set_rootpartition(raidPtr, *(int *) data);
1121 printf("raid%d: New rootpartition value is: %d\n",
1122 raidPtr->raidid, d);
1123 *(int *) data = d;
1124 return (retcode);
1125
1126 /* initialize all parity */
1127 case RAIDFRAME_REWRITEPARITY:
1128
1129 if (raidPtr->Layout.map->faultsTolerated == 0) {
1130 /* Parity for RAID 0 is trivially correct */
1131 raidPtr->parity_good = RF_RAID_CLEAN;
1132 return(0);
1133 }
1134
1135 if (raidPtr->parity_rewrite_in_progress == 1) {
1136 /* Re-write is already in progress! */
1137 return(EINVAL);
1138 }
1139
1140 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1141 rf_RewriteParityThread,
1142 raidPtr,"raid_parity");
1143 return (retcode);
1144
1145
1146 case RAIDFRAME_ADD_HOT_SPARE:
1147 sparePtr = (RF_SingleComponent_t *) data;
1148 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1149 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1150 return(retcode);
1151
1152 case RAIDFRAME_REMOVE_HOT_SPARE:
1153 return(retcode);
1154
1155 case RAIDFRAME_DELETE_COMPONENT:
1156 componentPtr = (RF_SingleComponent_t *)data;
1157 memcpy( &component, componentPtr,
1158 sizeof(RF_SingleComponent_t));
1159 retcode = rf_delete_component(raidPtr, &component);
1160 return(retcode);
1161
1162 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1163 componentPtr = (RF_SingleComponent_t *)data;
1164 memcpy( &component, componentPtr,
1165 sizeof(RF_SingleComponent_t));
1166 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1167 return(retcode);
1168
1169 case RAIDFRAME_REBUILD_IN_PLACE:
1170
1171 if (raidPtr->Layout.map->faultsTolerated == 0) {
1172 /* Can't do this on a RAID 0!! */
1173 return(EINVAL);
1174 }
1175
1176 if (raidPtr->recon_in_progress == 1) {
1177 /* a reconstruct is already in progress! */
1178 return(EINVAL);
1179 }
1180
1181 componentPtr = (RF_SingleComponent_t *) data;
1182 memcpy( &component, componentPtr,
1183 sizeof(RF_SingleComponent_t));
1184 row = component.row;
1185 column = component.column;
1186 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1187 row, column);
1188 if ((row < 0) || (row >= raidPtr->numRow) ||
1189 (column < 0) || (column >= raidPtr->numCol)) {
1190 return(EINVAL);
1191 }
1192
1193 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1194 if (rrcopy == NULL)
1195 return(ENOMEM);
1196
1197 rrcopy->raidPtr = (void *) raidPtr;
1198 rrcopy->row = row;
1199 rrcopy->col = column;
1200
1201 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1202 rf_ReconstructInPlaceThread,
1203 rrcopy,"raid_reconip");
1204 return(retcode);
1205
1206 case RAIDFRAME_GET_INFO:
1207 if (!raidPtr->valid)
1208 return (ENODEV);
1209 ucfgp = (RF_DeviceConfig_t **) data;
1210 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1211 (RF_DeviceConfig_t *));
1212 if (d_cfg == NULL)
1213 return (ENOMEM);
1214 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1215 d_cfg->rows = raidPtr->numRow;
1216 d_cfg->cols = raidPtr->numCol;
1217 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1218 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1219 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1220 return (ENOMEM);
1221 }
1222 d_cfg->nspares = raidPtr->numSpare;
1223 if (d_cfg->nspares >= RF_MAX_DISKS) {
1224 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1225 return (ENOMEM);
1226 }
1227 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1228 d = 0;
1229 for (i = 0; i < d_cfg->rows; i++) {
1230 for (j = 0; j < d_cfg->cols; j++) {
1231 d_cfg->devs[d] = raidPtr->Disks[i][j];
1232 d++;
1233 }
1234 }
1235 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1236 d_cfg->spares[i] = raidPtr->Disks[0][j];
1237 }
1238 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1239 sizeof(RF_DeviceConfig_t));
1240 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1241
1242 return (retcode);
1243
1244 case RAIDFRAME_CHECK_PARITY:
1245 *(int *) data = raidPtr->parity_good;
1246 return (0);
1247
1248 case RAIDFRAME_RESET_ACCTOTALS:
1249 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1250 return (0);
1251
1252 case RAIDFRAME_GET_ACCTOTALS:
1253 totals = (RF_AccTotals_t *) data;
1254 *totals = raidPtr->acc_totals;
1255 return (0);
1256
1257 case RAIDFRAME_KEEP_ACCTOTALS:
1258 raidPtr->keep_acc_totals = *(int *)data;
1259 return (0);
1260
1261 case RAIDFRAME_GET_SIZE:
1262 *(int *) data = raidPtr->totalSectors;
1263 return (0);
1264
1265 /* fail a disk & optionally start reconstruction */
1266 case RAIDFRAME_FAIL_DISK:
1267
1268 if (raidPtr->Layout.map->faultsTolerated == 0) {
1269 /* Can't do this on a RAID 0!! */
1270 return(EINVAL);
1271 }
1272
1273 rr = (struct rf_recon_req *) data;
1274
1275 if (rr->row < 0 || rr->row >= raidPtr->numRow
1276 || rr->col < 0 || rr->col >= raidPtr->numCol)
1277 return (EINVAL);
1278
1279 printf("raid%d: Failing the disk: row: %d col: %d\n",
1280 unit, rr->row, rr->col);
1281
1282 /* make a copy of the recon request so that we don't rely on
1283 * the user's buffer */
1284 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1285 if (rrcopy == NULL)
1286 return(ENOMEM);
1287 memcpy(rrcopy, rr, sizeof(*rr));
1288 rrcopy->raidPtr = (void *) raidPtr;
1289
1290 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1291 rf_ReconThread,
1292 rrcopy,"raid_recon");
1293 return (0);
1294
1295 /* invoke a copyback operation after recon on whatever disk
1296 * needs it, if any */
1297 case RAIDFRAME_COPYBACK:
1298
1299 if (raidPtr->Layout.map->faultsTolerated == 0) {
1300 /* This makes no sense on a RAID 0!! */
1301 return(EINVAL);
1302 }
1303
1304 if (raidPtr->copyback_in_progress == 1) {
1305 /* Copyback is already in progress! */
1306 return(EINVAL);
1307 }
1308
1309 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1310 rf_CopybackThread,
1311 raidPtr,"raid_copyback");
1312 return (retcode);
1313
1314 /* return the percentage completion of reconstruction */
1315 case RAIDFRAME_CHECK_RECON_STATUS:
1316 if (raidPtr->Layout.map->faultsTolerated == 0) {
1317 /* This makes no sense on a RAID 0, so tell the
1318 user it's done. */
1319 *(int *) data = 100;
1320 return(0);
1321 }
1322 row = 0; /* XXX we only consider a single row... */
1323 if (raidPtr->status[row] != rf_rs_reconstructing)
1324 *(int *) data = 100;
1325 else
1326 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1327 return (0);
1328 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1329 progressInfoPtr = (RF_ProgressInfo_t **) data;
1330 row = 0; /* XXX we only consider a single row... */
1331 if (raidPtr->status[row] != rf_rs_reconstructing) {
1332 progressInfo.remaining = 0;
1333 progressInfo.completed = 100;
1334 progressInfo.total = 100;
1335 } else {
1336 progressInfo.total =
1337 raidPtr->reconControl[row]->numRUsTotal;
1338 progressInfo.completed =
1339 raidPtr->reconControl[row]->numRUsComplete;
1340 progressInfo.remaining = progressInfo.total -
1341 progressInfo.completed;
1342 }
1343 retcode = copyout((caddr_t) &progressInfo,
1344 (caddr_t) *progressInfoPtr,
1345 sizeof(RF_ProgressInfo_t));
1346 return (retcode);
1347
1348 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1349 if (raidPtr->Layout.map->faultsTolerated == 0) {
1350 /* This makes no sense on a RAID 0, so tell the
1351 user it's done. */
1352 *(int *) data = 100;
1353 return(0);
1354 }
1355 if (raidPtr->parity_rewrite_in_progress == 1) {
1356 *(int *) data = 100 *
1357 raidPtr->parity_rewrite_stripes_done /
1358 raidPtr->Layout.numStripe;
1359 } else {
1360 *(int *) data = 100;
1361 }
1362 return (0);
1363
1364 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1365 progressInfoPtr = (RF_ProgressInfo_t **) data;
1366 if (raidPtr->parity_rewrite_in_progress == 1) {
1367 progressInfo.total = raidPtr->Layout.numStripe;
1368 progressInfo.completed =
1369 raidPtr->parity_rewrite_stripes_done;
1370 progressInfo.remaining = progressInfo.total -
1371 progressInfo.completed;
1372 } else {
1373 progressInfo.remaining = 0;
1374 progressInfo.completed = 100;
1375 progressInfo.total = 100;
1376 }
1377 retcode = copyout((caddr_t) &progressInfo,
1378 (caddr_t) *progressInfoPtr,
1379 sizeof(RF_ProgressInfo_t));
1380 return (retcode);
1381
1382 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1383 if (raidPtr->Layout.map->faultsTolerated == 0) {
1384 /* This makes no sense on a RAID 0 */
1385 *(int *) data = 100;
1386 return(0);
1387 }
1388 if (raidPtr->copyback_in_progress == 1) {
1389 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1390 raidPtr->Layout.numStripe;
1391 } else {
1392 *(int *) data = 100;
1393 }
1394 return (0);
1395
1396 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1397 progressInfoPtr = (RF_ProgressInfo_t **) data;
1398 if (raidPtr->copyback_in_progress == 1) {
1399 progressInfo.total = raidPtr->Layout.numStripe;
1400 progressInfo.completed =
1401 raidPtr->copyback_stripes_done;
1402 progressInfo.remaining = progressInfo.total -
1403 progressInfo.completed;
1404 } else {
1405 progressInfo.remaining = 0;
1406 progressInfo.completed = 100;
1407 progressInfo.total = 100;
1408 }
1409 retcode = copyout((caddr_t) &progressInfo,
1410 (caddr_t) *progressInfoPtr,
1411 sizeof(RF_ProgressInfo_t));
1412 return (retcode);
1413
1414 /* the sparetable daemon calls this to wait for the kernel to
1415 * need a spare table. this ioctl does not return until a
1416 * spare table is needed. XXX -- calling mpsleep here in the
1417 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1418 * -- I should either compute the spare table in the kernel,
1419 * or have a different -- XXX XXX -- interface (a different
1420 * character device) for delivering the table -- XXX */
1421 #if 0
1422 case RAIDFRAME_SPARET_WAIT:
1423 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1424 while (!rf_sparet_wait_queue)
1425 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1426 waitreq = rf_sparet_wait_queue;
1427 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1428 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1429
1430 /* structure assignment */
1431 *((RF_SparetWait_t *) data) = *waitreq;
1432
1433 RF_Free(waitreq, sizeof(*waitreq));
1434 return (0);
1435
1436 /* wakes up a process waiting on SPARET_WAIT and puts an error
1437 * code in it that will cause the dameon to exit */
1438 case RAIDFRAME_ABORT_SPARET_WAIT:
1439 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1440 waitreq->fcol = -1;
1441 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1442 waitreq->next = rf_sparet_wait_queue;
1443 rf_sparet_wait_queue = waitreq;
1444 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1445 wakeup(&rf_sparet_wait_queue);
1446 return (0);
1447
1448 /* used by the spare table daemon to deliver a spare table
1449 * into the kernel */
1450 case RAIDFRAME_SEND_SPARET:
1451
1452 /* install the spare table */
1453 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1454
1455 /* respond to the requestor. the return status of the spare
1456 * table installation is passed in the "fcol" field */
1457 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1458 waitreq->fcol = retcode;
1459 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1460 waitreq->next = rf_sparet_resp_queue;
1461 rf_sparet_resp_queue = waitreq;
1462 wakeup(&rf_sparet_resp_queue);
1463 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1464
1465 return (retcode);
1466 #endif
1467
1468 default:
1469 break; /* fall through to the os-specific code below */
1470
1471 }
1472
1473 if (!raidPtr->valid)
1474 return (EINVAL);
1475
1476 /*
1477 * Add support for "regular" device ioctls here.
1478 */
1479
1480 switch (cmd) {
1481 case DIOCGDINFO:
1482 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1483 break;
1484 #ifdef __HAVE_OLD_DISKLABEL
1485 case ODIOCGDINFO:
1486 newlabel = *(rs->sc_dkdev.dk_label);
1487 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1488 return ENOTTY;
1489 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1490 break;
1491 #endif
1492
1493 case DIOCGPART:
1494 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1495 ((struct partinfo *) data)->part =
1496 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1497 break;
1498
1499 case DIOCWDINFO:
1500 case DIOCSDINFO:
1501 #ifdef __HAVE_OLD_DISKLABEL
1502 case ODIOCWDINFO:
1503 case ODIOCSDINFO:
1504 #endif
1505 {
1506 struct disklabel *lp;
1507 #ifdef __HAVE_OLD_DISKLABEL
1508 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1509 memset(&newlabel, 0, sizeof newlabel);
1510 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1511 lp = &newlabel;
1512 } else
1513 #endif
1514 lp = (struct disklabel *)data;
1515
1516 if ((error = raidlock(rs)) != 0)
1517 return (error);
1518
1519 rs->sc_flags |= RAIDF_LABELLING;
1520
1521 error = setdisklabel(rs->sc_dkdev.dk_label,
1522 lp, 0, rs->sc_dkdev.dk_cpulabel);
1523 if (error == 0) {
1524 if (cmd == DIOCWDINFO
1525 #ifdef __HAVE_OLD_DISKLABEL
1526 || cmd == ODIOCWDINFO
1527 #endif
1528 )
1529 error = writedisklabel(RAIDLABELDEV(dev),
1530 raidstrategy, rs->sc_dkdev.dk_label,
1531 rs->sc_dkdev.dk_cpulabel);
1532 }
1533 rs->sc_flags &= ~RAIDF_LABELLING;
1534
1535 raidunlock(rs);
1536
1537 if (error)
1538 return (error);
1539 break;
1540 }
1541
1542 case DIOCWLABEL:
1543 if (*(int *) data != 0)
1544 rs->sc_flags |= RAIDF_WLABEL;
1545 else
1546 rs->sc_flags &= ~RAIDF_WLABEL;
1547 break;
1548
1549 case DIOCGDEFLABEL:
1550 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1551 break;
1552
1553 #ifdef __HAVE_OLD_DISKLABEL
1554 case ODIOCGDEFLABEL:
1555 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1556 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1557 return ENOTTY;
1558 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1559 break;
1560 #endif
1561
1562 default:
1563 retcode = ENOTTY;
1564 }
1565 return (retcode);
1566
1567 }
1568
1569
1570 /* raidinit -- complete the rest of the initialization for the
1571 RAIDframe device. */
1572
1573
1574 static void
1575 raidinit(raidPtr)
1576 RF_Raid_t *raidPtr;
1577 {
1578 struct raid_softc *rs;
1579 int unit;
1580
1581 unit = raidPtr->raidid;
1582
1583 rs = &raid_softc[unit];
1584
1585 /* XXX should check return code first... */
1586 rs->sc_flags |= RAIDF_INITED;
1587
1588 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1589
1590 rs->sc_dkdev.dk_name = rs->sc_xname;
1591
1592 /* disk_attach actually creates space for the CPU disklabel, among
1593 * other things, so it's critical to call this *BEFORE* we try putzing
1594 * with disklabels. */
1595
1596 disk_attach(&rs->sc_dkdev);
1597
1598 /* XXX There may be a weird interaction here between this, and
1599 * protectedSectors, as used in RAIDframe. */
1600
1601 rs->sc_size = raidPtr->totalSectors;
1602
1603 }
1604
1605 /* wake up the daemon & tell it to get us a spare table
1606 * XXX
1607 * the entries in the queues should be tagged with the raidPtr
1608 * so that in the extremely rare case that two recons happen at once,
1609 * we know for which device were requesting a spare table
1610 * XXX
1611 *
1612 * XXX This code is not currently used. GO
1613 */
1614 int
1615 rf_GetSpareTableFromDaemon(req)
1616 RF_SparetWait_t *req;
1617 {
1618 int retcode;
1619
1620 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1621 req->next = rf_sparet_wait_queue;
1622 rf_sparet_wait_queue = req;
1623 wakeup(&rf_sparet_wait_queue);
1624
1625 /* mpsleep unlocks the mutex */
1626 while (!rf_sparet_resp_queue) {
1627 tsleep(&rf_sparet_resp_queue, PRIBIO,
1628 "raidframe getsparetable", 0);
1629 }
1630 req = rf_sparet_resp_queue;
1631 rf_sparet_resp_queue = req->next;
1632 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1633
1634 retcode = req->fcol;
1635 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1636 * alloc'd */
1637 return (retcode);
1638 }
1639
1640 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1641 * bp & passes it down.
1642 * any calls originating in the kernel must use non-blocking I/O
1643 * do some extra sanity checking to return "appropriate" error values for
1644 * certain conditions (to make some standard utilities work)
1645 *
1646 * Formerly known as: rf_DoAccessKernel
1647 */
1648 void
1649 raidstart(raidPtr)
1650 RF_Raid_t *raidPtr;
1651 {
1652 RF_SectorCount_t num_blocks, pb, sum;
1653 RF_RaidAddr_t raid_addr;
1654 int retcode;
1655 struct partition *pp;
1656 daddr_t blocknum;
1657 int unit;
1658 struct raid_softc *rs;
1659 int do_async;
1660 struct buf *bp;
1661
1662 unit = raidPtr->raidid;
1663 rs = &raid_softc[unit];
1664
1665 /* quick check to see if anything has died recently */
1666 RF_LOCK_MUTEX(raidPtr->mutex);
1667 if (raidPtr->numNewFailures > 0) {
1668 rf_update_component_labels(raidPtr,
1669 RF_NORMAL_COMPONENT_UPDATE);
1670 raidPtr->numNewFailures--;
1671 }
1672
1673 /* Check to see if we're at the limit... */
1674 while (raidPtr->openings > 0) {
1675 RF_UNLOCK_MUTEX(raidPtr->mutex);
1676
1677 /* get the next item, if any, from the queue */
1678 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1679 /* nothing more to do */
1680 return;
1681 }
1682 BUFQ_REMOVE(&rs->buf_queue, bp);
1683
1684 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1685 * partition.. Need to make it absolute to the underlying
1686 * device.. */
1687
1688 blocknum = bp->b_blkno;
1689 if (DISKPART(bp->b_dev) != RAW_PART) {
1690 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1691 blocknum += pp->p_offset;
1692 }
1693
1694 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1695 (int) blocknum));
1696
1697 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1698 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1699
1700 /* *THIS* is where we adjust what block we're going to...
1701 * but DO NOT TOUCH bp->b_blkno!!! */
1702 raid_addr = blocknum;
1703
1704 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1705 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1706 sum = raid_addr + num_blocks + pb;
1707 if (1 || rf_debugKernelAccess) {
1708 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1709 (int) raid_addr, (int) sum, (int) num_blocks,
1710 (int) pb, (int) bp->b_resid));
1711 }
1712 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1713 || (sum < num_blocks) || (sum < pb)) {
1714 bp->b_error = ENOSPC;
1715 bp->b_flags |= B_ERROR;
1716 bp->b_resid = bp->b_bcount;
1717 biodone(bp);
1718 RF_LOCK_MUTEX(raidPtr->mutex);
1719 continue;
1720 }
1721 /*
1722 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1723 */
1724
1725 if (bp->b_bcount & raidPtr->sectorMask) {
1726 bp->b_error = EINVAL;
1727 bp->b_flags |= B_ERROR;
1728 bp->b_resid = bp->b_bcount;
1729 biodone(bp);
1730 RF_LOCK_MUTEX(raidPtr->mutex);
1731 continue;
1732
1733 }
1734 db1_printf(("Calling DoAccess..\n"));
1735
1736
1737 RF_LOCK_MUTEX(raidPtr->mutex);
1738 raidPtr->openings--;
1739 RF_UNLOCK_MUTEX(raidPtr->mutex);
1740
1741 /*
1742 * Everything is async.
1743 */
1744 do_async = 1;
1745
1746 disk_busy(&rs->sc_dkdev);
1747
1748 /* XXX we're still at splbio() here... do we *really*
1749 need to be? */
1750
1751 /* don't ever condition on bp->b_flags & B_WRITE.
1752 * always condition on B_READ instead */
1753
1754 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1755 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1756 do_async, raid_addr, num_blocks,
1757 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1758
1759 RF_LOCK_MUTEX(raidPtr->mutex);
1760 }
1761 RF_UNLOCK_MUTEX(raidPtr->mutex);
1762 }
1763
1764
1765
1766
1767 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1768
1769 int
1770 rf_DispatchKernelIO(queue, req)
1771 RF_DiskQueue_t *queue;
1772 RF_DiskQueueData_t *req;
1773 {
1774 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1775 struct buf *bp;
1776 struct raidbuf *raidbp = NULL;
1777 struct raid_softc *rs;
1778 int unit;
1779 int s;
1780
1781 s=0;
1782 /* s = splbio();*/ /* want to test this */
1783 /* XXX along with the vnode, we also need the softc associated with
1784 * this device.. */
1785
1786 req->queue = queue;
1787
1788 unit = queue->raidPtr->raidid;
1789
1790 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1791
1792 if (unit >= numraid) {
1793 printf("Invalid unit number: %d %d\n", unit, numraid);
1794 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1795 }
1796 rs = &raid_softc[unit];
1797
1798 bp = req->bp;
1799 #if 1
1800 /* XXX when there is a physical disk failure, someone is passing us a
1801 * buffer that contains old stuff!! Attempt to deal with this problem
1802 * without taking a performance hit... (not sure where the real bug
1803 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1804
1805 if (bp->b_flags & B_ERROR) {
1806 bp->b_flags &= ~B_ERROR;
1807 }
1808 if (bp->b_error != 0) {
1809 bp->b_error = 0;
1810 }
1811 #endif
1812 raidbp = RAIDGETBUF(rs);
1813
1814 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1815
1816 /*
1817 * context for raidiodone
1818 */
1819 raidbp->rf_obp = bp;
1820 raidbp->req = req;
1821
1822 LIST_INIT(&raidbp->rf_buf.b_dep);
1823
1824 switch (req->type) {
1825 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1826 /* XXX need to do something extra here.. */
1827 /* I'm leaving this in, as I've never actually seen it used,
1828 * and I'd like folks to report it... GO */
1829 printf(("WAKEUP CALLED\n"));
1830 queue->numOutstanding++;
1831
1832 /* XXX need to glue the original buffer into this?? */
1833
1834 KernelWakeupFunc(&raidbp->rf_buf);
1835 break;
1836
1837 case RF_IO_TYPE_READ:
1838 case RF_IO_TYPE_WRITE:
1839
1840 if (req->tracerec) {
1841 RF_ETIMER_START(req->tracerec->timer);
1842 }
1843 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1844 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1845 req->sectorOffset, req->numSector,
1846 req->buf, KernelWakeupFunc, (void *) req,
1847 queue->raidPtr->logBytesPerSector, req->b_proc);
1848
1849 if (rf_debugKernelAccess) {
1850 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1851 (long) bp->b_blkno));
1852 }
1853 queue->numOutstanding++;
1854 queue->last_deq_sector = req->sectorOffset;
1855 /* acc wouldn't have been let in if there were any pending
1856 * reqs at any other priority */
1857 queue->curPriority = req->priority;
1858
1859 db1_printf(("Going for %c to unit %d row %d col %d\n",
1860 req->type, unit, queue->row, queue->col));
1861 db1_printf(("sector %d count %d (%d bytes) %d\n",
1862 (int) req->sectorOffset, (int) req->numSector,
1863 (int) (req->numSector <<
1864 queue->raidPtr->logBytesPerSector),
1865 (int) queue->raidPtr->logBytesPerSector));
1866 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1867 raidbp->rf_buf.b_vp->v_numoutput++;
1868 }
1869 VOP_STRATEGY(&raidbp->rf_buf);
1870
1871 break;
1872
1873 default:
1874 panic("bad req->type in rf_DispatchKernelIO");
1875 }
1876 db1_printf(("Exiting from DispatchKernelIO\n"));
1877 /* splx(s); */ /* want to test this */
1878 return (0);
1879 }
1880 /* this is the callback function associated with a I/O invoked from
1881 kernel code.
1882 */
1883 static void
1884 KernelWakeupFunc(vbp)
1885 struct buf *vbp;
1886 {
1887 RF_DiskQueueData_t *req = NULL;
1888 RF_DiskQueue_t *queue;
1889 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1890 struct buf *bp;
1891 struct raid_softc *rs;
1892 int unit;
1893 int s;
1894
1895 s = splbio();
1896 db1_printf(("recovering the request queue:\n"));
1897 req = raidbp->req;
1898
1899 bp = raidbp->rf_obp;
1900
1901 queue = (RF_DiskQueue_t *) req->queue;
1902
1903 if (raidbp->rf_buf.b_flags & B_ERROR) {
1904 bp->b_flags |= B_ERROR;
1905 bp->b_error = raidbp->rf_buf.b_error ?
1906 raidbp->rf_buf.b_error : EIO;
1907 }
1908
1909 /* XXX methinks this could be wrong... */
1910 #if 1
1911 bp->b_resid = raidbp->rf_buf.b_resid;
1912 #endif
1913
1914 if (req->tracerec) {
1915 RF_ETIMER_STOP(req->tracerec->timer);
1916 RF_ETIMER_EVAL(req->tracerec->timer);
1917 RF_LOCK_MUTEX(rf_tracing_mutex);
1918 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1919 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1920 req->tracerec->num_phys_ios++;
1921 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1922 }
1923 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1924
1925 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1926
1927
1928 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1929 * ballistic, and mark the component as hosed... */
1930
1931 if (bp->b_flags & B_ERROR) {
1932 /* Mark the disk as dead */
1933 /* but only mark it once... */
1934 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1935 rf_ds_optimal) {
1936 printf("raid%d: IO Error. Marking %s as failed.\n",
1937 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1938 queue->raidPtr->Disks[queue->row][queue->col].status =
1939 rf_ds_failed;
1940 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1941 queue->raidPtr->numFailures++;
1942 queue->raidPtr->numNewFailures++;
1943 } else { /* Disk is already dead... */
1944 /* printf("Disk already marked as dead!\n"); */
1945 }
1946
1947 }
1948
1949 rs = &raid_softc[unit];
1950 RAIDPUTBUF(rs, raidbp);
1951
1952 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1953 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1954
1955 splx(s);
1956 }
1957
1958
1959
1960 /*
1961 * initialize a buf structure for doing an I/O in the kernel.
1962 */
1963 static void
1964 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1965 logBytesPerSector, b_proc)
1966 struct buf *bp;
1967 struct vnode *b_vp;
1968 unsigned rw_flag;
1969 dev_t dev;
1970 RF_SectorNum_t startSect;
1971 RF_SectorCount_t numSect;
1972 caddr_t buf;
1973 void (*cbFunc) (struct buf *);
1974 void *cbArg;
1975 int logBytesPerSector;
1976 struct proc *b_proc;
1977 {
1978 /* bp->b_flags = B_PHYS | rw_flag; */
1979 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1980 bp->b_bcount = numSect << logBytesPerSector;
1981 bp->b_bufsize = bp->b_bcount;
1982 bp->b_error = 0;
1983 bp->b_dev = dev;
1984 bp->b_data = buf;
1985 bp->b_blkno = startSect;
1986 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1987 if (bp->b_bcount == 0) {
1988 panic("bp->b_bcount is zero in InitBP!!\n");
1989 }
1990 bp->b_proc = b_proc;
1991 bp->b_iodone = cbFunc;
1992 bp->b_vp = b_vp;
1993
1994 }
1995
1996 static void
1997 raidgetdefaultlabel(raidPtr, rs, lp)
1998 RF_Raid_t *raidPtr;
1999 struct raid_softc *rs;
2000 struct disklabel *lp;
2001 {
2002 db1_printf(("Building a default label...\n"));
2003 memset(lp, 0, sizeof(*lp));
2004
2005 /* fabricate a label... */
2006 lp->d_secperunit = raidPtr->totalSectors;
2007 lp->d_secsize = raidPtr->bytesPerSector;
2008 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2009 lp->d_ntracks = 4 * raidPtr->numCol;
2010 lp->d_ncylinders = raidPtr->totalSectors /
2011 (lp->d_nsectors * lp->d_ntracks);
2012 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2013
2014 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2015 lp->d_type = DTYPE_RAID;
2016 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2017 lp->d_rpm = 3600;
2018 lp->d_interleave = 1;
2019 lp->d_flags = 0;
2020
2021 lp->d_partitions[RAW_PART].p_offset = 0;
2022 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2023 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2024 lp->d_npartitions = RAW_PART + 1;
2025
2026 lp->d_magic = DISKMAGIC;
2027 lp->d_magic2 = DISKMAGIC;
2028 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2029
2030 }
2031 /*
2032 * Read the disklabel from the raid device. If one is not present, fake one
2033 * up.
2034 */
2035 static void
2036 raidgetdisklabel(dev)
2037 dev_t dev;
2038 {
2039 int unit = raidunit(dev);
2040 struct raid_softc *rs = &raid_softc[unit];
2041 char *errstring;
2042 struct disklabel *lp = rs->sc_dkdev.dk_label;
2043 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2044 RF_Raid_t *raidPtr;
2045
2046 db1_printf(("Getting the disklabel...\n"));
2047
2048 memset(clp, 0, sizeof(*clp));
2049
2050 raidPtr = raidPtrs[unit];
2051
2052 raidgetdefaultlabel(raidPtr, rs, lp);
2053
2054 /*
2055 * Call the generic disklabel extraction routine.
2056 */
2057 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2058 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2059 if (errstring)
2060 raidmakedisklabel(rs);
2061 else {
2062 int i;
2063 struct partition *pp;
2064
2065 /*
2066 * Sanity check whether the found disklabel is valid.
2067 *
2068 * This is necessary since total size of the raid device
2069 * may vary when an interleave is changed even though exactly
2070 * same componets are used, and old disklabel may used
2071 * if that is found.
2072 */
2073 if (lp->d_secperunit != rs->sc_size)
2074 printf("raid%d: WARNING: %s: "
2075 "total sector size in disklabel (%d) != "
2076 "the size of raid (%ld)\n", unit, rs->sc_xname,
2077 lp->d_secperunit, (long) rs->sc_size);
2078 for (i = 0; i < lp->d_npartitions; i++) {
2079 pp = &lp->d_partitions[i];
2080 if (pp->p_offset + pp->p_size > rs->sc_size)
2081 printf("raid%d: WARNING: %s: end of partition `%c' "
2082 "exceeds the size of raid (%ld)\n",
2083 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2084 }
2085 }
2086
2087 }
2088 /*
2089 * Take care of things one might want to take care of in the event
2090 * that a disklabel isn't present.
2091 */
2092 static void
2093 raidmakedisklabel(rs)
2094 struct raid_softc *rs;
2095 {
2096 struct disklabel *lp = rs->sc_dkdev.dk_label;
2097 db1_printf(("Making a label..\n"));
2098
2099 /*
2100 * For historical reasons, if there's no disklabel present
2101 * the raw partition must be marked FS_BSDFFS.
2102 */
2103
2104 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2105
2106 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2107
2108 lp->d_checksum = dkcksum(lp);
2109 }
2110 /*
2111 * Lookup the provided name in the filesystem. If the file exists,
2112 * is a valid block device, and isn't being used by anyone else,
2113 * set *vpp to the file's vnode.
2114 * You'll find the original of this in ccd.c
2115 */
2116 int
2117 raidlookup(path, p, vpp)
2118 char *path;
2119 struct proc *p;
2120 struct vnode **vpp; /* result */
2121 {
2122 struct nameidata nd;
2123 struct vnode *vp;
2124 struct vattr va;
2125 int error;
2126
2127 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2128 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2129 #if 0
2130 printf("RAIDframe: vn_open returned %d\n", error);
2131 #endif
2132 return (error);
2133 }
2134 vp = nd.ni_vp;
2135 if (vp->v_usecount > 1) {
2136 VOP_UNLOCK(vp, 0);
2137 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2138 return (EBUSY);
2139 }
2140 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2141 VOP_UNLOCK(vp, 0);
2142 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2143 return (error);
2144 }
2145 /* XXX: eventually we should handle VREG, too. */
2146 if (va.va_type != VBLK) {
2147 VOP_UNLOCK(vp, 0);
2148 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2149 return (ENOTBLK);
2150 }
2151 VOP_UNLOCK(vp, 0);
2152 *vpp = vp;
2153 return (0);
2154 }
2155 /*
2156 * Wait interruptibly for an exclusive lock.
2157 *
2158 * XXX
2159 * Several drivers do this; it should be abstracted and made MP-safe.
2160 * (Hmm... where have we seen this warning before :-> GO )
2161 */
2162 static int
2163 raidlock(rs)
2164 struct raid_softc *rs;
2165 {
2166 int error;
2167
2168 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2169 rs->sc_flags |= RAIDF_WANTED;
2170 if ((error =
2171 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2172 return (error);
2173 }
2174 rs->sc_flags |= RAIDF_LOCKED;
2175 return (0);
2176 }
2177 /*
2178 * Unlock and wake up any waiters.
2179 */
2180 static void
2181 raidunlock(rs)
2182 struct raid_softc *rs;
2183 {
2184
2185 rs->sc_flags &= ~RAIDF_LOCKED;
2186 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2187 rs->sc_flags &= ~RAIDF_WANTED;
2188 wakeup(rs);
2189 }
2190 }
2191
2192
2193 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2194 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2195
2196 int
2197 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2198 {
2199 RF_ComponentLabel_t clabel;
2200 raidread_component_label(dev, b_vp, &clabel);
2201 clabel.mod_counter = mod_counter;
2202 clabel.clean = RF_RAID_CLEAN;
2203 raidwrite_component_label(dev, b_vp, &clabel);
2204 return(0);
2205 }
2206
2207
2208 int
2209 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2210 {
2211 RF_ComponentLabel_t clabel;
2212 raidread_component_label(dev, b_vp, &clabel);
2213 clabel.mod_counter = mod_counter;
2214 clabel.clean = RF_RAID_DIRTY;
2215 raidwrite_component_label(dev, b_vp, &clabel);
2216 return(0);
2217 }
2218
2219 /* ARGSUSED */
2220 int
2221 raidread_component_label(dev, b_vp, clabel)
2222 dev_t dev;
2223 struct vnode *b_vp;
2224 RF_ComponentLabel_t *clabel;
2225 {
2226 struct buf *bp;
2227 const struct bdevsw *bdev;
2228 int error;
2229
2230 /* XXX should probably ensure that we don't try to do this if
2231 someone has changed rf_protected_sectors. */
2232
2233 if (b_vp == NULL) {
2234 /* For whatever reason, this component is not valid.
2235 Don't try to read a component label from it. */
2236 return(EINVAL);
2237 }
2238
2239 /* get a block of the appropriate size... */
2240 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2241 bp->b_dev = dev;
2242
2243 /* get our ducks in a row for the read */
2244 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2245 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2246 bp->b_flags |= B_READ;
2247 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2248
2249 bdev = bdevsw_lookup(bp->b_dev);
2250 if (bdev == NULL)
2251 return (ENXIO);
2252 (*bdev->d_strategy)(bp);
2253
2254 error = biowait(bp);
2255
2256 if (!error) {
2257 memcpy(clabel, bp->b_data,
2258 sizeof(RF_ComponentLabel_t));
2259 #if 0
2260 rf_print_component_label( clabel );
2261 #endif
2262 } else {
2263 #if 0
2264 printf("Failed to read RAID component label!\n");
2265 #endif
2266 }
2267
2268 brelse(bp);
2269 return(error);
2270 }
2271 /* ARGSUSED */
2272 int
2273 raidwrite_component_label(dev, b_vp, clabel)
2274 dev_t dev;
2275 struct vnode *b_vp;
2276 RF_ComponentLabel_t *clabel;
2277 {
2278 struct buf *bp;
2279 const struct bdevsw *bdev;
2280 int error;
2281
2282 /* get a block of the appropriate size... */
2283 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2284 bp->b_dev = dev;
2285
2286 /* get our ducks in a row for the write */
2287 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2288 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2289 bp->b_flags |= B_WRITE;
2290 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2291
2292 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2293
2294 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2295
2296 bdev = bdevsw_lookup(bp->b_dev);
2297 if (bdev == NULL)
2298 return (ENXIO);
2299 (*bdev->d_strategy)(bp);
2300 error = biowait(bp);
2301 brelse(bp);
2302 if (error) {
2303 #if 1
2304 printf("Failed to write RAID component info!\n");
2305 #endif
2306 }
2307
2308 return(error);
2309 }
2310
2311 void
2312 rf_markalldirty(raidPtr)
2313 RF_Raid_t *raidPtr;
2314 {
2315 RF_ComponentLabel_t clabel;
2316 int r,c;
2317
2318 raidPtr->mod_counter++;
2319 for (r = 0; r < raidPtr->numRow; r++) {
2320 for (c = 0; c < raidPtr->numCol; c++) {
2321 /* we don't want to touch (at all) a disk that has
2322 failed */
2323 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2324 raidread_component_label(
2325 raidPtr->Disks[r][c].dev,
2326 raidPtr->raid_cinfo[r][c].ci_vp,
2327 &clabel);
2328 if (clabel.status == rf_ds_spared) {
2329 /* XXX do something special...
2330 but whatever you do, don't
2331 try to access it!! */
2332 } else {
2333 #if 0
2334 clabel.status =
2335 raidPtr->Disks[r][c].status;
2336 raidwrite_component_label(
2337 raidPtr->Disks[r][c].dev,
2338 raidPtr->raid_cinfo[r][c].ci_vp,
2339 &clabel);
2340 #endif
2341 raidmarkdirty(
2342 raidPtr->Disks[r][c].dev,
2343 raidPtr->raid_cinfo[r][c].ci_vp,
2344 raidPtr->mod_counter);
2345 }
2346 }
2347 }
2348 }
2349 /* printf("Component labels marked dirty.\n"); */
2350 #if 0
2351 for( c = 0; c < raidPtr->numSpare ; c++) {
2352 sparecol = raidPtr->numCol + c;
2353 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2354 /*
2355
2356 XXX this is where we get fancy and map this spare
2357 into it's correct spot in the array.
2358
2359 */
2360 /*
2361
2362 we claim this disk is "optimal" if it's
2363 rf_ds_used_spare, as that means it should be
2364 directly substitutable for the disk it replaced.
2365 We note that too...
2366
2367 */
2368
2369 for(i=0;i<raidPtr->numRow;i++) {
2370 for(j=0;j<raidPtr->numCol;j++) {
2371 if ((raidPtr->Disks[i][j].spareRow ==
2372 r) &&
2373 (raidPtr->Disks[i][j].spareCol ==
2374 sparecol)) {
2375 srow = r;
2376 scol = sparecol;
2377 break;
2378 }
2379 }
2380 }
2381
2382 raidread_component_label(
2383 raidPtr->Disks[r][sparecol].dev,
2384 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2385 &clabel);
2386 /* make sure status is noted */
2387 clabel.version = RF_COMPONENT_LABEL_VERSION;
2388 clabel.mod_counter = raidPtr->mod_counter;
2389 clabel.serial_number = raidPtr->serial_number;
2390 clabel.row = srow;
2391 clabel.column = scol;
2392 clabel.num_rows = raidPtr->numRow;
2393 clabel.num_columns = raidPtr->numCol;
2394 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2395 clabel.status = rf_ds_optimal;
2396 raidwrite_component_label(
2397 raidPtr->Disks[r][sparecol].dev,
2398 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2399 &clabel);
2400 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2401 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2402 }
2403 }
2404
2405 #endif
2406 }
2407
2408
2409 void
2410 rf_update_component_labels(raidPtr, final)
2411 RF_Raid_t *raidPtr;
2412 int final;
2413 {
2414 RF_ComponentLabel_t clabel;
2415 int sparecol;
2416 int r,c;
2417 int i,j;
2418 int srow, scol;
2419
2420 srow = -1;
2421 scol = -1;
2422
2423 /* XXX should do extra checks to make sure things really are clean,
2424 rather than blindly setting the clean bit... */
2425
2426 raidPtr->mod_counter++;
2427
2428 for (r = 0; r < raidPtr->numRow; r++) {
2429 for (c = 0; c < raidPtr->numCol; c++) {
2430 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2431 raidread_component_label(
2432 raidPtr->Disks[r][c].dev,
2433 raidPtr->raid_cinfo[r][c].ci_vp,
2434 &clabel);
2435 /* make sure status is noted */
2436 clabel.status = rf_ds_optimal;
2437 /* bump the counter */
2438 clabel.mod_counter = raidPtr->mod_counter;
2439
2440 raidwrite_component_label(
2441 raidPtr->Disks[r][c].dev,
2442 raidPtr->raid_cinfo[r][c].ci_vp,
2443 &clabel);
2444 if (final == RF_FINAL_COMPONENT_UPDATE) {
2445 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2446 raidmarkclean(
2447 raidPtr->Disks[r][c].dev,
2448 raidPtr->raid_cinfo[r][c].ci_vp,
2449 raidPtr->mod_counter);
2450 }
2451 }
2452 }
2453 /* else we don't touch it.. */
2454 }
2455 }
2456
2457 for( c = 0; c < raidPtr->numSpare ; c++) {
2458 sparecol = raidPtr->numCol + c;
2459 /* Need to ensure that the reconstruct actually completed! */
2460 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2461 /*
2462
2463 we claim this disk is "optimal" if it's
2464 rf_ds_used_spare, as that means it should be
2465 directly substitutable for the disk it replaced.
2466 We note that too...
2467
2468 */
2469
2470 for(i=0;i<raidPtr->numRow;i++) {
2471 for(j=0;j<raidPtr->numCol;j++) {
2472 if ((raidPtr->Disks[i][j].spareRow ==
2473 0) &&
2474 (raidPtr->Disks[i][j].spareCol ==
2475 sparecol)) {
2476 srow = i;
2477 scol = j;
2478 break;
2479 }
2480 }
2481 }
2482
2483 /* XXX shouldn't *really* need this... */
2484 raidread_component_label(
2485 raidPtr->Disks[0][sparecol].dev,
2486 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2487 &clabel);
2488 /* make sure status is noted */
2489
2490 raid_init_component_label(raidPtr, &clabel);
2491
2492 clabel.mod_counter = raidPtr->mod_counter;
2493 clabel.row = srow;
2494 clabel.column = scol;
2495 clabel.status = rf_ds_optimal;
2496
2497 raidwrite_component_label(
2498 raidPtr->Disks[0][sparecol].dev,
2499 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2500 &clabel);
2501 if (final == RF_FINAL_COMPONENT_UPDATE) {
2502 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2503 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2504 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2505 raidPtr->mod_counter);
2506 }
2507 }
2508 }
2509 }
2510 /* printf("Component labels updated\n"); */
2511 }
2512
2513 void
2514 rf_close_component(raidPtr, vp, auto_configured)
2515 RF_Raid_t *raidPtr;
2516 struct vnode *vp;
2517 int auto_configured;
2518 {
2519 struct proc *p;
2520
2521 p = raidPtr->engine_thread;
2522
2523 if (vp != NULL) {
2524 if (auto_configured == 1) {
2525 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2526 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2527 vput(vp);
2528
2529 } else {
2530 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2531 }
2532 } else {
2533 #if 0
2534 printf("vnode was NULL\n");
2535 #endif
2536 }
2537 }
2538
2539
2540 void
2541 rf_UnconfigureVnodes(raidPtr)
2542 RF_Raid_t *raidPtr;
2543 {
2544 int r,c;
2545 struct proc *p;
2546 struct vnode *vp;
2547 int acd;
2548
2549
2550 /* We take this opportunity to close the vnodes like we should.. */
2551
2552 p = raidPtr->engine_thread;
2553
2554 for (r = 0; r < raidPtr->numRow; r++) {
2555 for (c = 0; c < raidPtr->numCol; c++) {
2556 #if 0
2557 printf("raid%d: Closing vnode for row: %d col: %d\n",
2558 raidPtr->raidid, r, c);
2559 #endif
2560 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2561 acd = raidPtr->Disks[r][c].auto_configured;
2562 rf_close_component(raidPtr, vp, acd);
2563 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2564 raidPtr->Disks[r][c].auto_configured = 0;
2565 }
2566 }
2567 for (r = 0; r < raidPtr->numSpare; r++) {
2568 #if 0
2569 printf("raid%d: Closing vnode for spare: %d\n",
2570 raidPtr->raidid, r);
2571 #endif
2572 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2573 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2574 rf_close_component(raidPtr, vp, acd);
2575 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2576 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2577 }
2578 }
2579
2580
2581 void
2582 rf_ReconThread(req)
2583 struct rf_recon_req *req;
2584 {
2585 int s;
2586 RF_Raid_t *raidPtr;
2587
2588 s = splbio();
2589 raidPtr = (RF_Raid_t *) req->raidPtr;
2590 raidPtr->recon_in_progress = 1;
2591
2592 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2593 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2594
2595 /* XXX get rid of this! we don't need it at all.. */
2596 RF_Free(req, sizeof(*req));
2597
2598 raidPtr->recon_in_progress = 0;
2599 splx(s);
2600
2601 /* That's all... */
2602 kthread_exit(0); /* does not return */
2603 }
2604
2605 void
2606 rf_RewriteParityThread(raidPtr)
2607 RF_Raid_t *raidPtr;
2608 {
2609 int retcode;
2610 int s;
2611
2612 raidPtr->parity_rewrite_in_progress = 1;
2613 s = splbio();
2614 retcode = rf_RewriteParity(raidPtr);
2615 splx(s);
2616 if (retcode) {
2617 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2618 } else {
2619 /* set the clean bit! If we shutdown correctly,
2620 the clean bit on each component label will get
2621 set */
2622 raidPtr->parity_good = RF_RAID_CLEAN;
2623 }
2624 raidPtr->parity_rewrite_in_progress = 0;
2625
2626 /* Anyone waiting for us to stop? If so, inform them... */
2627 if (raidPtr->waitShutdown) {
2628 wakeup(&raidPtr->parity_rewrite_in_progress);
2629 }
2630
2631 /* That's all... */
2632 kthread_exit(0); /* does not return */
2633 }
2634
2635
2636 void
2637 rf_CopybackThread(raidPtr)
2638 RF_Raid_t *raidPtr;
2639 {
2640 int s;
2641
2642 raidPtr->copyback_in_progress = 1;
2643 s = splbio();
2644 rf_CopybackReconstructedData(raidPtr);
2645 splx(s);
2646 raidPtr->copyback_in_progress = 0;
2647
2648 /* That's all... */
2649 kthread_exit(0); /* does not return */
2650 }
2651
2652
2653 void
2654 rf_ReconstructInPlaceThread(req)
2655 struct rf_recon_req *req;
2656 {
2657 int retcode;
2658 int s;
2659 RF_Raid_t *raidPtr;
2660
2661 s = splbio();
2662 raidPtr = req->raidPtr;
2663 raidPtr->recon_in_progress = 1;
2664 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2665 RF_Free(req, sizeof(*req));
2666 raidPtr->recon_in_progress = 0;
2667 splx(s);
2668
2669 /* That's all... */
2670 kthread_exit(0); /* does not return */
2671 }
2672
2673 void
2674 rf_mountroot_hook(dev)
2675 struct device *dev;
2676 {
2677
2678 }
2679
2680
2681 RF_AutoConfig_t *
2682 rf_find_raid_components()
2683 {
2684 struct vnode *vp;
2685 struct disklabel label;
2686 struct device *dv;
2687 dev_t dev;
2688 int bmajor;
2689 int error;
2690 int i;
2691 int good_one;
2692 RF_ComponentLabel_t *clabel;
2693 RF_AutoConfig_t *ac_list;
2694 RF_AutoConfig_t *ac;
2695
2696
2697 /* initialize the AutoConfig list */
2698 ac_list = NULL;
2699
2700 /* we begin by trolling through *all* the devices on the system */
2701
2702 for (dv = alldevs.tqh_first; dv != NULL;
2703 dv = dv->dv_list.tqe_next) {
2704
2705 /* we are only interested in disks... */
2706 if (dv->dv_class != DV_DISK)
2707 continue;
2708
2709 /* we don't care about floppies... */
2710 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2711 continue;
2712 }
2713 /* hdfd is the Atari/Hades floppy driver */
2714 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2715 continue;
2716 }
2717 /* fdisa is the Atari/Milan floppy driver */
2718 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2719 continue;
2720 }
2721
2722 /* need to find the device_name_to_block_device_major stuff */
2723 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2724
2725 /* get a vnode for the raw partition of this disk */
2726
2727 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2728 if (bdevvp(dev, &vp))
2729 panic("RAID can't alloc vnode");
2730
2731 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2732
2733 if (error) {
2734 /* "Who cares." Continue looking
2735 for something that exists*/
2736 vput(vp);
2737 continue;
2738 }
2739
2740 /* Ok, the disk exists. Go get the disklabel. */
2741 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2742 FREAD, NOCRED, 0);
2743 if (error) {
2744 /*
2745 * XXX can't happen - open() would
2746 * have errored out (or faked up one)
2747 */
2748 printf("can't get label for dev %s%c (%d)!?!?\n",
2749 dv->dv_xname, 'a' + RAW_PART, error);
2750 }
2751
2752 /* don't need this any more. We'll allocate it again
2753 a little later if we really do... */
2754 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2755 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2756 vput(vp);
2757
2758 for (i=0; i < label.d_npartitions; i++) {
2759 /* We only support partitions marked as RAID */
2760 if (label.d_partitions[i].p_fstype != FS_RAID)
2761 continue;
2762
2763 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2764 if (bdevvp(dev, &vp))
2765 panic("RAID can't alloc vnode");
2766
2767 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2768 if (error) {
2769 /* Whatever... */
2770 vput(vp);
2771 continue;
2772 }
2773
2774 good_one = 0;
2775
2776 clabel = (RF_ComponentLabel_t *)
2777 malloc(sizeof(RF_ComponentLabel_t),
2778 M_RAIDFRAME, M_NOWAIT);
2779 if (clabel == NULL) {
2780 /* XXX CLEANUP HERE */
2781 printf("RAID auto config: out of memory!\n");
2782 return(NULL); /* XXX probably should panic? */
2783 }
2784
2785 if (!raidread_component_label(dev, vp, clabel)) {
2786 /* Got the label. Does it look reasonable? */
2787 if (rf_reasonable_label(clabel) &&
2788 (clabel->partitionSize <=
2789 label.d_partitions[i].p_size)) {
2790 #if DEBUG
2791 printf("Component on: %s%c: %d\n",
2792 dv->dv_xname, 'a'+i,
2793 label.d_partitions[i].p_size);
2794 rf_print_component_label(clabel);
2795 #endif
2796 /* if it's reasonable, add it,
2797 else ignore it. */
2798 ac = (RF_AutoConfig_t *)
2799 malloc(sizeof(RF_AutoConfig_t),
2800 M_RAIDFRAME,
2801 M_NOWAIT);
2802 if (ac == NULL) {
2803 /* XXX should panic?? */
2804 return(NULL);
2805 }
2806
2807 sprintf(ac->devname, "%s%c",
2808 dv->dv_xname, 'a'+i);
2809 ac->dev = dev;
2810 ac->vp = vp;
2811 ac->clabel = clabel;
2812 ac->next = ac_list;
2813 ac_list = ac;
2814 good_one = 1;
2815 }
2816 }
2817 if (!good_one) {
2818 /* cleanup */
2819 free(clabel, M_RAIDFRAME);
2820 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2821 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2822 vput(vp);
2823 }
2824 }
2825 }
2826 return(ac_list);
2827 }
2828
2829 static int
2830 rf_reasonable_label(clabel)
2831 RF_ComponentLabel_t *clabel;
2832 {
2833
2834 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2835 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2836 ((clabel->clean == RF_RAID_CLEAN) ||
2837 (clabel->clean == RF_RAID_DIRTY)) &&
2838 clabel->row >=0 &&
2839 clabel->column >= 0 &&
2840 clabel->num_rows > 0 &&
2841 clabel->num_columns > 0 &&
2842 clabel->row < clabel->num_rows &&
2843 clabel->column < clabel->num_columns &&
2844 clabel->blockSize > 0 &&
2845 clabel->numBlocks > 0) {
2846 /* label looks reasonable enough... */
2847 return(1);
2848 }
2849 return(0);
2850 }
2851
2852
2853 void
2854 rf_print_component_label(clabel)
2855 RF_ComponentLabel_t *clabel;
2856 {
2857 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2858 clabel->row, clabel->column,
2859 clabel->num_rows, clabel->num_columns);
2860 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2861 clabel->version, clabel->serial_number,
2862 clabel->mod_counter);
2863 printf(" Clean: %s Status: %d\n",
2864 clabel->clean ? "Yes" : "No", clabel->status );
2865 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2866 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2867 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2868 (char) clabel->parityConfig, clabel->blockSize,
2869 clabel->numBlocks);
2870 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2871 printf(" Contains root partition: %s\n",
2872 clabel->root_partition ? "Yes" : "No" );
2873 printf(" Last configured as: raid%d\n", clabel->last_unit );
2874 #if 0
2875 printf(" Config order: %d\n", clabel->config_order);
2876 #endif
2877
2878 }
2879
2880 RF_ConfigSet_t *
2881 rf_create_auto_sets(ac_list)
2882 RF_AutoConfig_t *ac_list;
2883 {
2884 RF_AutoConfig_t *ac;
2885 RF_ConfigSet_t *config_sets;
2886 RF_ConfigSet_t *cset;
2887 RF_AutoConfig_t *ac_next;
2888
2889
2890 config_sets = NULL;
2891
2892 /* Go through the AutoConfig list, and figure out which components
2893 belong to what sets. */
2894 ac = ac_list;
2895 while(ac!=NULL) {
2896 /* we're going to putz with ac->next, so save it here
2897 for use at the end of the loop */
2898 ac_next = ac->next;
2899
2900 if (config_sets == NULL) {
2901 /* will need at least this one... */
2902 config_sets = (RF_ConfigSet_t *)
2903 malloc(sizeof(RF_ConfigSet_t),
2904 M_RAIDFRAME, M_NOWAIT);
2905 if (config_sets == NULL) {
2906 panic("rf_create_auto_sets: No memory!\n");
2907 }
2908 /* this one is easy :) */
2909 config_sets->ac = ac;
2910 config_sets->next = NULL;
2911 config_sets->rootable = 0;
2912 ac->next = NULL;
2913 } else {
2914 /* which set does this component fit into? */
2915 cset = config_sets;
2916 while(cset!=NULL) {
2917 if (rf_does_it_fit(cset, ac)) {
2918 /* looks like it matches... */
2919 ac->next = cset->ac;
2920 cset->ac = ac;
2921 break;
2922 }
2923 cset = cset->next;
2924 }
2925 if (cset==NULL) {
2926 /* didn't find a match above... new set..*/
2927 cset = (RF_ConfigSet_t *)
2928 malloc(sizeof(RF_ConfigSet_t),
2929 M_RAIDFRAME, M_NOWAIT);
2930 if (cset == NULL) {
2931 panic("rf_create_auto_sets: No memory!\n");
2932 }
2933 cset->ac = ac;
2934 ac->next = NULL;
2935 cset->next = config_sets;
2936 cset->rootable = 0;
2937 config_sets = cset;
2938 }
2939 }
2940 ac = ac_next;
2941 }
2942
2943
2944 return(config_sets);
2945 }
2946
2947 static int
2948 rf_does_it_fit(cset, ac)
2949 RF_ConfigSet_t *cset;
2950 RF_AutoConfig_t *ac;
2951 {
2952 RF_ComponentLabel_t *clabel1, *clabel2;
2953
2954 /* If this one matches the *first* one in the set, that's good
2955 enough, since the other members of the set would have been
2956 through here too... */
2957 /* note that we are not checking partitionSize here..
2958
2959 Note that we are also not checking the mod_counters here.
2960 If everything else matches execpt the mod_counter, that's
2961 good enough for this test. We will deal with the mod_counters
2962 a little later in the autoconfiguration process.
2963
2964 (clabel1->mod_counter == clabel2->mod_counter) &&
2965
2966 The reason we don't check for this is that failed disks
2967 will have lower modification counts. If those disks are
2968 not added to the set they used to belong to, then they will
2969 form their own set, which may result in 2 different sets,
2970 for example, competing to be configured at raid0, and
2971 perhaps competing to be the root filesystem set. If the
2972 wrong ones get configured, or both attempt to become /,
2973 weird behaviour and or serious lossage will occur. Thus we
2974 need to bring them into the fold here, and kick them out at
2975 a later point.
2976
2977 */
2978
2979 clabel1 = cset->ac->clabel;
2980 clabel2 = ac->clabel;
2981 if ((clabel1->version == clabel2->version) &&
2982 (clabel1->serial_number == clabel2->serial_number) &&
2983 (clabel1->num_rows == clabel2->num_rows) &&
2984 (clabel1->num_columns == clabel2->num_columns) &&
2985 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2986 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2987 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2988 (clabel1->parityConfig == clabel2->parityConfig) &&
2989 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2990 (clabel1->blockSize == clabel2->blockSize) &&
2991 (clabel1->numBlocks == clabel2->numBlocks) &&
2992 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2993 (clabel1->root_partition == clabel2->root_partition) &&
2994 (clabel1->last_unit == clabel2->last_unit) &&
2995 (clabel1->config_order == clabel2->config_order)) {
2996 /* if it get's here, it almost *has* to be a match */
2997 } else {
2998 /* it's not consistent with somebody in the set..
2999 punt */
3000 return(0);
3001 }
3002 /* all was fine.. it must fit... */
3003 return(1);
3004 }
3005
3006 int
3007 rf_have_enough_components(cset)
3008 RF_ConfigSet_t *cset;
3009 {
3010 RF_AutoConfig_t *ac;
3011 RF_AutoConfig_t *auto_config;
3012 RF_ComponentLabel_t *clabel;
3013 int r,c;
3014 int num_rows;
3015 int num_cols;
3016 int num_missing;
3017 int mod_counter;
3018 int mod_counter_found;
3019 int even_pair_failed;
3020 char parity_type;
3021
3022
3023 /* check to see that we have enough 'live' components
3024 of this set. If so, we can configure it if necessary */
3025
3026 num_rows = cset->ac->clabel->num_rows;
3027 num_cols = cset->ac->clabel->num_columns;
3028 parity_type = cset->ac->clabel->parityConfig;
3029
3030 /* XXX Check for duplicate components!?!?!? */
3031
3032 /* Determine what the mod_counter is supposed to be for this set. */
3033
3034 mod_counter_found = 0;
3035 mod_counter = 0;
3036 ac = cset->ac;
3037 while(ac!=NULL) {
3038 if (mod_counter_found==0) {
3039 mod_counter = ac->clabel->mod_counter;
3040 mod_counter_found = 1;
3041 } else {
3042 if (ac->clabel->mod_counter > mod_counter) {
3043 mod_counter = ac->clabel->mod_counter;
3044 }
3045 }
3046 ac = ac->next;
3047 }
3048
3049 num_missing = 0;
3050 auto_config = cset->ac;
3051
3052 for(r=0; r<num_rows; r++) {
3053 even_pair_failed = 0;
3054 for(c=0; c<num_cols; c++) {
3055 ac = auto_config;
3056 while(ac!=NULL) {
3057 if ((ac->clabel->row == r) &&
3058 (ac->clabel->column == c) &&
3059 (ac->clabel->mod_counter == mod_counter)) {
3060 /* it's this one... */
3061 #if DEBUG
3062 printf("Found: %s at %d,%d\n",
3063 ac->devname,r,c);
3064 #endif
3065 break;
3066 }
3067 ac=ac->next;
3068 }
3069 if (ac==NULL) {
3070 /* Didn't find one here! */
3071 /* special case for RAID 1, especially
3072 where there are more than 2
3073 components (where RAIDframe treats
3074 things a little differently :( ) */
3075 if (parity_type == '1') {
3076 if (c%2 == 0) { /* even component */
3077 even_pair_failed = 1;
3078 } else { /* odd component. If
3079 we're failed, and
3080 so is the even
3081 component, it's
3082 "Good Night, Charlie" */
3083 if (even_pair_failed == 1) {
3084 return(0);
3085 }
3086 }
3087 } else {
3088 /* normal accounting */
3089 num_missing++;
3090 }
3091 }
3092 if ((parity_type == '1') && (c%2 == 1)) {
3093 /* Just did an even component, and we didn't
3094 bail.. reset the even_pair_failed flag,
3095 and go on to the next component.... */
3096 even_pair_failed = 0;
3097 }
3098 }
3099 }
3100
3101 clabel = cset->ac->clabel;
3102
3103 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3104 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3105 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3106 /* XXX this needs to be made *much* more general */
3107 /* Too many failures */
3108 return(0);
3109 }
3110 /* otherwise, all is well, and we've got enough to take a kick
3111 at autoconfiguring this set */
3112 return(1);
3113 }
3114
3115 void
3116 rf_create_configuration(ac,config,raidPtr)
3117 RF_AutoConfig_t *ac;
3118 RF_Config_t *config;
3119 RF_Raid_t *raidPtr;
3120 {
3121 RF_ComponentLabel_t *clabel;
3122 int i;
3123
3124 clabel = ac->clabel;
3125
3126 /* 1. Fill in the common stuff */
3127 config->numRow = clabel->num_rows;
3128 config->numCol = clabel->num_columns;
3129 config->numSpare = 0; /* XXX should this be set here? */
3130 config->sectPerSU = clabel->sectPerSU;
3131 config->SUsPerPU = clabel->SUsPerPU;
3132 config->SUsPerRU = clabel->SUsPerRU;
3133 config->parityConfig = clabel->parityConfig;
3134 /* XXX... */
3135 strcpy(config->diskQueueType,"fifo");
3136 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3137 config->layoutSpecificSize = 0; /* XXX ?? */
3138
3139 while(ac!=NULL) {
3140 /* row/col values will be in range due to the checks
3141 in reasonable_label() */
3142 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3143 ac->devname);
3144 ac = ac->next;
3145 }
3146
3147 for(i=0;i<RF_MAXDBGV;i++) {
3148 config->debugVars[i][0] = NULL;
3149 }
3150 }
3151
3152 int
3153 rf_set_autoconfig(raidPtr, new_value)
3154 RF_Raid_t *raidPtr;
3155 int new_value;
3156 {
3157 RF_ComponentLabel_t clabel;
3158 struct vnode *vp;
3159 dev_t dev;
3160 int row, column;
3161
3162 raidPtr->autoconfigure = new_value;
3163 for(row=0; row<raidPtr->numRow; row++) {
3164 for(column=0; column<raidPtr->numCol; column++) {
3165 if (raidPtr->Disks[row][column].status ==
3166 rf_ds_optimal) {
3167 dev = raidPtr->Disks[row][column].dev;
3168 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3169 raidread_component_label(dev, vp, &clabel);
3170 clabel.autoconfigure = new_value;
3171 raidwrite_component_label(dev, vp, &clabel);
3172 }
3173 }
3174 }
3175 return(new_value);
3176 }
3177
3178 int
3179 rf_set_rootpartition(raidPtr, new_value)
3180 RF_Raid_t *raidPtr;
3181 int new_value;
3182 {
3183 RF_ComponentLabel_t clabel;
3184 struct vnode *vp;
3185 dev_t dev;
3186 int row, column;
3187
3188 raidPtr->root_partition = new_value;
3189 for(row=0; row<raidPtr->numRow; row++) {
3190 for(column=0; column<raidPtr->numCol; column++) {
3191 if (raidPtr->Disks[row][column].status ==
3192 rf_ds_optimal) {
3193 dev = raidPtr->Disks[row][column].dev;
3194 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3195 raidread_component_label(dev, vp, &clabel);
3196 clabel.root_partition = new_value;
3197 raidwrite_component_label(dev, vp, &clabel);
3198 }
3199 }
3200 }
3201 return(new_value);
3202 }
3203
3204 void
3205 rf_release_all_vps(cset)
3206 RF_ConfigSet_t *cset;
3207 {
3208 RF_AutoConfig_t *ac;
3209
3210 ac = cset->ac;
3211 while(ac!=NULL) {
3212 /* Close the vp, and give it back */
3213 if (ac->vp) {
3214 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3215 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3216 vput(ac->vp);
3217 ac->vp = NULL;
3218 }
3219 ac = ac->next;
3220 }
3221 }
3222
3223
3224 void
3225 rf_cleanup_config_set(cset)
3226 RF_ConfigSet_t *cset;
3227 {
3228 RF_AutoConfig_t *ac;
3229 RF_AutoConfig_t *next_ac;
3230
3231 ac = cset->ac;
3232 while(ac!=NULL) {
3233 next_ac = ac->next;
3234 /* nuke the label */
3235 free(ac->clabel, M_RAIDFRAME);
3236 /* cleanup the config structure */
3237 free(ac, M_RAIDFRAME);
3238 /* "next.." */
3239 ac = next_ac;
3240 }
3241 /* and, finally, nuke the config set */
3242 free(cset, M_RAIDFRAME);
3243 }
3244
3245
3246 void
3247 raid_init_component_label(raidPtr, clabel)
3248 RF_Raid_t *raidPtr;
3249 RF_ComponentLabel_t *clabel;
3250 {
3251 /* current version number */
3252 clabel->version = RF_COMPONENT_LABEL_VERSION;
3253 clabel->serial_number = raidPtr->serial_number;
3254 clabel->mod_counter = raidPtr->mod_counter;
3255 clabel->num_rows = raidPtr->numRow;
3256 clabel->num_columns = raidPtr->numCol;
3257 clabel->clean = RF_RAID_DIRTY; /* not clean */
3258 clabel->status = rf_ds_optimal; /* "It's good!" */
3259
3260 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3261 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3262 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3263
3264 clabel->blockSize = raidPtr->bytesPerSector;
3265 clabel->numBlocks = raidPtr->sectorsPerDisk;
3266
3267 /* XXX not portable */
3268 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3269 clabel->maxOutstanding = raidPtr->maxOutstanding;
3270 clabel->autoconfigure = raidPtr->autoconfigure;
3271 clabel->root_partition = raidPtr->root_partition;
3272 clabel->last_unit = raidPtr->raidid;
3273 clabel->config_order = raidPtr->config_order;
3274 }
3275
3276 int
3277 rf_auto_config_set(cset,unit)
3278 RF_ConfigSet_t *cset;
3279 int *unit;
3280 {
3281 RF_Raid_t *raidPtr;
3282 RF_Config_t *config;
3283 int raidID;
3284 int retcode;
3285
3286 printf("RAID autoconfigure\n");
3287
3288 retcode = 0;
3289 *unit = -1;
3290
3291 /* 1. Create a config structure */
3292
3293 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3294 M_RAIDFRAME,
3295 M_NOWAIT);
3296 if (config==NULL) {
3297 printf("Out of mem!?!?\n");
3298 /* XXX do something more intelligent here. */
3299 return(1);
3300 }
3301
3302 memset(config, 0, sizeof(RF_Config_t));
3303
3304 /* XXX raidID needs to be set correctly.. */
3305
3306 /*
3307 2. Figure out what RAID ID this one is supposed to live at
3308 See if we can get the same RAID dev that it was configured
3309 on last time..
3310 */
3311
3312 raidID = cset->ac->clabel->last_unit;
3313 if ((raidID < 0) || (raidID >= numraid)) {
3314 /* let's not wander off into lala land. */
3315 raidID = numraid - 1;
3316 }
3317 if (raidPtrs[raidID]->valid != 0) {
3318
3319 /*
3320 Nope... Go looking for an alternative...
3321 Start high so we don't immediately use raid0 if that's
3322 not taken.
3323 */
3324
3325 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3326 if (raidPtrs[raidID]->valid == 0) {
3327 /* can use this one! */
3328 break;
3329 }
3330 }
3331 }
3332
3333 if (raidID < 0) {
3334 /* punt... */
3335 printf("Unable to auto configure this set!\n");
3336 printf("(Out of RAID devs!)\n");
3337 return(1);
3338 }
3339 printf("Configuring raid%d:\n",raidID);
3340 raidPtr = raidPtrs[raidID];
3341
3342 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3343 raidPtr->raidid = raidID;
3344 raidPtr->openings = RAIDOUTSTANDING;
3345
3346 /* 3. Build the configuration structure */
3347 rf_create_configuration(cset->ac, config, raidPtr);
3348
3349 /* 4. Do the configuration */
3350 retcode = rf_Configure(raidPtr, config, cset->ac);
3351
3352 if (retcode == 0) {
3353
3354 raidinit(raidPtrs[raidID]);
3355
3356 rf_markalldirty(raidPtrs[raidID]);
3357 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3358 if (cset->ac->clabel->root_partition==1) {
3359 /* everything configured just fine. Make a note
3360 that this set is eligible to be root. */
3361 cset->rootable = 1;
3362 /* XXX do this here? */
3363 raidPtrs[raidID]->root_partition = 1;
3364 }
3365 }
3366
3367 /* 5. Cleanup */
3368 free(config, M_RAIDFRAME);
3369
3370 *unit = raidID;
3371 return(retcode);
3372 }
3373
3374 void
3375 rf_disk_unbusy(desc)
3376 RF_RaidAccessDesc_t *desc;
3377 {
3378 struct buf *bp;
3379
3380 bp = (struct buf *)desc->bp;
3381 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3382 (bp->b_bcount - bp->b_resid));
3383 }
3384