rf_netbsdkintf.c revision 1.106.2.5 1 /* $NetBSD: rf_netbsdkintf.c,v 1.106.2.5 2002/09/06 08:46:05 jdolecek Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.106.2.5 2002/09/06 08:46:05 jdolecek Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit(RF_Raid_t *);
181
182 void raidattach(int);
183 int raidsize(dev_t);
184 int raidopen(dev_t, int, int, struct proc *);
185 int raidclose(dev_t, int, int, struct proc *);
186 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
187 int raidwrite(dev_t, struct uio *, int);
188 int raidread(dev_t, struct uio *, int);
189 void raidstrategy(struct buf *);
190 int raiddump(dev_t, daddr_t, caddr_t, size_t);
191
192 /*
193 * Pilfered from ccd.c
194 */
195
196 struct raidbuf {
197 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
198 struct buf *rf_obp; /* ptr. to original I/O buf */
199 int rf_flags; /* misc. flags */
200 RF_DiskQueueData_t *req;/* the request that this was part of.. */
201 };
202
203 /* component buffer pool */
204 struct pool raidframe_cbufpool;
205
206 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
207 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
208
209 /* XXX Not sure if the following should be replacing the raidPtrs above,
210 or if it should be used in conjunction with that...
211 */
212
213 struct raid_softc {
214 int sc_flags; /* flags */
215 int sc_cflags; /* configuration flags */
216 size_t sc_size; /* size of the raid device */
217 char sc_xname[20]; /* XXX external name */
218 struct disk sc_dkdev; /* generic disk device info */
219 struct bufq_state buf_queue; /* used for the device queue */
220 };
221 /* sc_flags */
222 #define RAIDF_INITED 0x01 /* unit has been initialized */
223 #define RAIDF_WLABEL 0x02 /* label area is writable */
224 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
225 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
226 #define RAIDF_LOCKED 0x80 /* unit is locked */
227
228 #define raidunit(x) DISKUNIT(x)
229 int numraid = 0;
230
231 /*
232 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
233 * Be aware that large numbers can allow the driver to consume a lot of
234 * kernel memory, especially on writes, and in degraded mode reads.
235 *
236 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
237 * a single 64K write will typically require 64K for the old data,
238 * 64K for the old parity, and 64K for the new parity, for a total
239 * of 192K (if the parity buffer is not re-used immediately).
240 * Even it if is used immediately, that's still 128K, which when multiplied
241 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
242 *
243 * Now in degraded mode, for example, a 64K read on the above setup may
244 * require data reconstruction, which will require *all* of the 4 remaining
245 * disks to participate -- 4 * 32K/disk == 128K again.
246 */
247
248 #ifndef RAIDOUTSTANDING
249 #define RAIDOUTSTANDING 6
250 #endif
251
252 #define RAIDLABELDEV(dev) \
253 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
254
255 /* declared here, and made public, for the benefit of KVM stuff.. */
256 struct raid_softc *raid_softc;
257
258 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
259 struct disklabel *);
260 static void raidgetdisklabel(dev_t);
261 static void raidmakedisklabel(struct raid_softc *);
262
263 static int raidlock(struct raid_softc *);
264 static void raidunlock(struct raid_softc *);
265
266 static void rf_markalldirty(RF_Raid_t *);
267 void rf_mountroot_hook(struct device *);
268
269 struct device *raidrootdev;
270
271 void rf_ReconThread(struct rf_recon_req *);
272 /* XXX what I want is: */
273 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
274 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
275 void rf_CopybackThread(RF_Raid_t *raidPtr);
276 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
277 void rf_buildroothack(void *);
278
279 RF_AutoConfig_t *rf_find_raid_components(void);
280 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
281 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
282 static int rf_reasonable_label(RF_ComponentLabel_t *);
283 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
284 int rf_set_autoconfig(RF_Raid_t *, int);
285 int rf_set_rootpartition(RF_Raid_t *, int);
286 void rf_release_all_vps(RF_ConfigSet_t *);
287 void rf_cleanup_config_set(RF_ConfigSet_t *);
288 int rf_have_enough_components(RF_ConfigSet_t *);
289 int rf_auto_config_set(RF_ConfigSet_t *, int *);
290
291 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
292 allow autoconfig to take place.
293 Note that this is overridden by having
294 RAID_AUTOCONFIG as an option in the
295 kernel config file. */
296
297 void
298 raidattach(num)
299 int num;
300 {
301 int raidID;
302 int i, rc;
303 RF_AutoConfig_t *ac_list; /* autoconfig list */
304 RF_ConfigSet_t *config_sets;
305
306 #ifdef DEBUG
307 printf("raidattach: Asked for %d units\n", num);
308 #endif
309
310 if (num <= 0) {
311 #ifdef DIAGNOSTIC
312 panic("raidattach: count <= 0");
313 #endif
314 return;
315 }
316 /* This is where all the initialization stuff gets done. */
317
318 numraid = num;
319
320 /* Make some space for requested number of units... */
321
322 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
323 if (raidPtrs == NULL) {
324 panic("raidPtrs is NULL!!\n");
325 }
326
327 /* Initialize the component buffer pool. */
328 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
329 0, 0, "raidpl", NULL);
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337
338 for (i = 0; i < num; i++)
339 raidPtrs[i] = NULL;
340 rc = rf_BootRaidframe();
341 if (rc == 0)
342 printf("Kernelized RAIDframe activated\n");
343 else
344 panic("Serious error booting RAID!!\n");
345
346 /* put together some datastructures like the CCD device does.. This
347 * lets us lock the device and what-not when it gets opened. */
348
349 raid_softc = (struct raid_softc *)
350 malloc(num * sizeof(struct raid_softc),
351 M_RAIDFRAME, M_NOWAIT);
352 if (raid_softc == NULL) {
353 printf("WARNING: no memory for RAIDframe driver\n");
354 return;
355 }
356
357 memset(raid_softc, 0, num * sizeof(struct raid_softc));
358
359 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
360 M_RAIDFRAME, M_NOWAIT);
361 if (raidrootdev == NULL) {
362 panic("No memory for RAIDframe driver!!?!?!\n");
363 }
364
365 for (raidID = 0; raidID < num; raidID++) {
366 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
367
368 raidrootdev[raidID].dv_class = DV_DISK;
369 raidrootdev[raidID].dv_cfdata = NULL;
370 raidrootdev[raidID].dv_unit = raidID;
371 raidrootdev[raidID].dv_parent = NULL;
372 raidrootdev[raidID].dv_flags = 0;
373 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
374
375 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
376 (RF_Raid_t *));
377 if (raidPtrs[raidID] == NULL) {
378 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
379 numraid = raidID;
380 return;
381 }
382 }
383
384 #ifdef RAID_AUTOCONFIG
385 raidautoconfig = 1;
386 #endif
387
388 if (raidautoconfig) {
389 /* 1. locate all RAID components on the system */
390
391 #if DEBUG
392 printf("Searching for raid components...\n");
393 #endif
394 ac_list = rf_find_raid_components();
395
396 /* 2. sort them into their respective sets */
397
398 config_sets = rf_create_auto_sets(ac_list);
399
400 /* 3. evaluate each set and configure the valid ones
401 This gets done in rf_buildroothack() */
402
403 /* schedule the creation of the thread to do the
404 "/ on RAID" stuff */
405
406 kthread_create(rf_buildroothack,config_sets);
407
408 #if 0
409 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
410 #endif
411 }
412
413 }
414
415 void
416 rf_buildroothack(arg)
417 void *arg;
418 {
419 RF_ConfigSet_t *config_sets = arg;
420 RF_ConfigSet_t *cset;
421 RF_ConfigSet_t *next_cset;
422 int retcode;
423 int raidID;
424 int rootID;
425 int num_root;
426
427 rootID = 0;
428 num_root = 0;
429 cset = config_sets;
430 while(cset != NULL ) {
431 next_cset = cset->next;
432 if (rf_have_enough_components(cset) &&
433 cset->ac->clabel->autoconfigure==1) {
434 retcode = rf_auto_config_set(cset,&raidID);
435 if (!retcode) {
436 if (cset->rootable) {
437 rootID = raidID;
438 num_root++;
439 }
440 } else {
441 /* The autoconfig didn't work :( */
442 #if DEBUG
443 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
444 #endif
445 rf_release_all_vps(cset);
446 }
447 } else {
448 /* we're not autoconfiguring this set...
449 release the associated resources */
450 rf_release_all_vps(cset);
451 }
452 /* cleanup */
453 rf_cleanup_config_set(cset);
454 cset = next_cset;
455 }
456
457 /* we found something bootable... */
458
459 if (num_root == 1) {
460 booted_device = &raidrootdev[rootID];
461 } else if (num_root > 1) {
462 /* we can't guess.. require the user to answer... */
463 boothowto |= RB_ASKNAME;
464 }
465 }
466
467
468 int
469 raidsize(dev)
470 dev_t dev;
471 {
472 struct raid_softc *rs;
473 struct disklabel *lp;
474 int part, unit, omask, size;
475
476 unit = raidunit(dev);
477 if (unit >= numraid)
478 return (-1);
479 rs = &raid_softc[unit];
480
481 if ((rs->sc_flags & RAIDF_INITED) == 0)
482 return (-1);
483
484 part = DISKPART(dev);
485 omask = rs->sc_dkdev.dk_openmask & (1 << part);
486 lp = rs->sc_dkdev.dk_label;
487
488 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
489 return (-1);
490
491 if (lp->d_partitions[part].p_fstype != FS_SWAP)
492 size = -1;
493 else
494 size = lp->d_partitions[part].p_size *
495 (lp->d_secsize / DEV_BSIZE);
496
497 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
498 return (-1);
499
500 return (size);
501
502 }
503
504 int
505 raiddump(dev, blkno, va, size)
506 dev_t dev;
507 daddr_t blkno;
508 caddr_t va;
509 size_t size;
510 {
511 /* Not implemented. */
512 return ENXIO;
513 }
514 /* ARGSUSED */
515 int
516 raidopen(dev, flags, fmt, p)
517 dev_t dev;
518 int flags, fmt;
519 struct proc *p;
520 {
521 int unit = raidunit(dev);
522 struct raid_softc *rs;
523 struct disklabel *lp;
524 int part, pmask;
525 int error = 0;
526
527 if (unit >= numraid)
528 return (ENXIO);
529 rs = &raid_softc[unit];
530
531 if ((error = raidlock(rs)) != 0)
532 return (error);
533 lp = rs->sc_dkdev.dk_label;
534
535 part = DISKPART(dev);
536 pmask = (1 << part);
537
538 db1_printf(("Opening raid device number: %d partition: %d\n",
539 unit, part));
540
541
542 if ((rs->sc_flags & RAIDF_INITED) &&
543 (rs->sc_dkdev.dk_openmask == 0))
544 raidgetdisklabel(dev);
545
546 /* make sure that this partition exists */
547
548 if (part != RAW_PART) {
549 db1_printf(("Not a raw partition..\n"));
550 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
551 ((part >= lp->d_npartitions) ||
552 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
553 error = ENXIO;
554 raidunlock(rs);
555 db1_printf(("Bailing out...\n"));
556 return (error);
557 }
558 }
559 /* Prevent this unit from being unconfigured while open. */
560 switch (fmt) {
561 case S_IFCHR:
562 rs->sc_dkdev.dk_copenmask |= pmask;
563 break;
564
565 case S_IFBLK:
566 rs->sc_dkdev.dk_bopenmask |= pmask;
567 break;
568 }
569
570 if ((rs->sc_dkdev.dk_openmask == 0) &&
571 ((rs->sc_flags & RAIDF_INITED) != 0)) {
572 /* First one... mark things as dirty... Note that we *MUST*
573 have done a configure before this. I DO NOT WANT TO BE
574 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
575 THAT THEY BELONG TOGETHER!!!!! */
576 /* XXX should check to see if we're only open for reading
577 here... If so, we needn't do this, but then need some
578 other way of keeping track of what's happened.. */
579
580 rf_markalldirty( raidPtrs[unit] );
581 }
582
583
584 rs->sc_dkdev.dk_openmask =
585 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
586
587 raidunlock(rs);
588
589 return (error);
590
591
592 }
593 /* ARGSUSED */
594 int
595 raidclose(dev, flags, fmt, p)
596 dev_t dev;
597 int flags, fmt;
598 struct proc *p;
599 {
600 int unit = raidunit(dev);
601 struct raid_softc *rs;
602 int error = 0;
603 int part;
604
605 if (unit >= numraid)
606 return (ENXIO);
607 rs = &raid_softc[unit];
608
609 if ((error = raidlock(rs)) != 0)
610 return (error);
611
612 part = DISKPART(dev);
613
614 /* ...that much closer to allowing unconfiguration... */
615 switch (fmt) {
616 case S_IFCHR:
617 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
618 break;
619
620 case S_IFBLK:
621 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
622 break;
623 }
624 rs->sc_dkdev.dk_openmask =
625 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
626
627 if ((rs->sc_dkdev.dk_openmask == 0) &&
628 ((rs->sc_flags & RAIDF_INITED) != 0)) {
629 /* Last one... device is not unconfigured yet.
630 Device shutdown has taken care of setting the
631 clean bits if RAIDF_INITED is not set
632 mark things as clean... */
633 #if 0
634 printf("Last one on raid%d. Updating status.\n",unit);
635 #endif
636 rf_update_component_labels(raidPtrs[unit],
637 RF_FINAL_COMPONENT_UPDATE);
638 if (doing_shutdown) {
639 /* last one, and we're going down, so
640 lights out for this RAID set too. */
641 error = rf_Shutdown(raidPtrs[unit]);
642
643 /* It's no longer initialized... */
644 rs->sc_flags &= ~RAIDF_INITED;
645
646 /* Detach the disk. */
647 disk_detach(&rs->sc_dkdev);
648 }
649 }
650
651 raidunlock(rs);
652 return (0);
653
654 }
655
656 void
657 raidstrategy(bp)
658 struct buf *bp;
659 {
660 int s;
661
662 unsigned int raidID = raidunit(bp->b_dev);
663 RF_Raid_t *raidPtr;
664 struct raid_softc *rs = &raid_softc[raidID];
665 struct disklabel *lp;
666 int wlabel;
667
668 if ((rs->sc_flags & RAIDF_INITED) ==0) {
669 bp->b_error = ENXIO;
670 bp->b_flags |= B_ERROR;
671 bp->b_resid = bp->b_bcount;
672 biodone(bp);
673 return;
674 }
675 if (raidID >= numraid || !raidPtrs[raidID]) {
676 bp->b_error = ENODEV;
677 bp->b_flags |= B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 raidPtr = raidPtrs[raidID];
683 if (!raidPtr->valid) {
684 bp->b_error = ENODEV;
685 bp->b_flags |= B_ERROR;
686 bp->b_resid = bp->b_bcount;
687 biodone(bp);
688 return;
689 }
690 if (bp->b_bcount == 0) {
691 db1_printf(("b_bcount is zero..\n"));
692 biodone(bp);
693 return;
694 }
695 lp = rs->sc_dkdev.dk_label;
696
697 /*
698 * Do bounds checking and adjust transfer. If there's an
699 * error, the bounds check will flag that for us.
700 */
701
702 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
703 if (DISKPART(bp->b_dev) != RAW_PART)
704 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
705 db1_printf(("Bounds check failed!!:%d %d\n",
706 (int) bp->b_blkno, (int) wlabel));
707 biodone(bp);
708 return;
709 }
710 s = splbio();
711
712 bp->b_resid = 0;
713
714 /* stuff it onto our queue */
715 BUFQ_PUT(&rs->buf_queue, bp);
716
717 raidstart(raidPtrs[raidID]);
718
719 splx(s);
720 }
721 /* ARGSUSED */
722 int
723 raidread(dev, uio, flags)
724 dev_t dev;
725 struct uio *uio;
726 int flags;
727 {
728 int unit = raidunit(dev);
729 struct raid_softc *rs;
730 int part;
731
732 if (unit >= numraid)
733 return (ENXIO);
734 rs = &raid_softc[unit];
735
736 if ((rs->sc_flags & RAIDF_INITED) == 0)
737 return (ENXIO);
738 part = DISKPART(dev);
739
740 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
741
742 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
743
744 }
745 /* ARGSUSED */
746 int
747 raidwrite(dev, uio, flags)
748 dev_t dev;
749 struct uio *uio;
750 int flags;
751 {
752 int unit = raidunit(dev);
753 struct raid_softc *rs;
754
755 if (unit >= numraid)
756 return (ENXIO);
757 rs = &raid_softc[unit];
758
759 if ((rs->sc_flags & RAIDF_INITED) == 0)
760 return (ENXIO);
761 db1_printf(("raidwrite\n"));
762 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
763
764 }
765
766 int
767 raidioctl(dev, cmd, data, flag, p)
768 dev_t dev;
769 u_long cmd;
770 caddr_t data;
771 int flag;
772 struct proc *p;
773 {
774 int unit = raidunit(dev);
775 int error = 0;
776 int part, pmask;
777 struct raid_softc *rs;
778 RF_Config_t *k_cfg, *u_cfg;
779 RF_Raid_t *raidPtr;
780 RF_RaidDisk_t *diskPtr;
781 RF_AccTotals_t *totals;
782 RF_DeviceConfig_t *d_cfg, **ucfgp;
783 u_char *specific_buf;
784 int retcode = 0;
785 int row;
786 int column;
787 int raidid;
788 struct rf_recon_req *rrcopy, *rr;
789 RF_ComponentLabel_t *clabel;
790 RF_ComponentLabel_t ci_label;
791 RF_ComponentLabel_t **clabel_ptr;
792 RF_SingleComponent_t *sparePtr,*componentPtr;
793 RF_SingleComponent_t hot_spare;
794 RF_SingleComponent_t component;
795 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
796 int i, j, d;
797 #ifdef __HAVE_OLD_DISKLABEL
798 struct disklabel newlabel;
799 #endif
800
801 if (unit >= numraid)
802 return (ENXIO);
803 rs = &raid_softc[unit];
804 raidPtr = raidPtrs[unit];
805
806 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
807 (int) DISKPART(dev), (int) unit, (int) cmd));
808
809 /* Must be open for writes for these commands... */
810 switch (cmd) {
811 case DIOCSDINFO:
812 case DIOCWDINFO:
813 #ifdef __HAVE_OLD_DISKLABEL
814 case ODIOCWDINFO:
815 case ODIOCSDINFO:
816 #endif
817 case DIOCWLABEL:
818 if ((flag & FWRITE) == 0)
819 return (EBADF);
820 }
821
822 /* Must be initialized for these... */
823 switch (cmd) {
824 case DIOCGDINFO:
825 case DIOCSDINFO:
826 case DIOCWDINFO:
827 #ifdef __HAVE_OLD_DISKLABEL
828 case ODIOCGDINFO:
829 case ODIOCWDINFO:
830 case ODIOCSDINFO:
831 case ODIOCGDEFLABEL:
832 #endif
833 case DIOCGPART:
834 case DIOCWLABEL:
835 case DIOCGDEFLABEL:
836 case RAIDFRAME_SHUTDOWN:
837 case RAIDFRAME_REWRITEPARITY:
838 case RAIDFRAME_GET_INFO:
839 case RAIDFRAME_RESET_ACCTOTALS:
840 case RAIDFRAME_GET_ACCTOTALS:
841 case RAIDFRAME_KEEP_ACCTOTALS:
842 case RAIDFRAME_GET_SIZE:
843 case RAIDFRAME_FAIL_DISK:
844 case RAIDFRAME_COPYBACK:
845 case RAIDFRAME_CHECK_RECON_STATUS:
846 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
847 case RAIDFRAME_GET_COMPONENT_LABEL:
848 case RAIDFRAME_SET_COMPONENT_LABEL:
849 case RAIDFRAME_ADD_HOT_SPARE:
850 case RAIDFRAME_REMOVE_HOT_SPARE:
851 case RAIDFRAME_INIT_LABELS:
852 case RAIDFRAME_REBUILD_IN_PLACE:
853 case RAIDFRAME_CHECK_PARITY:
854 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
855 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
856 case RAIDFRAME_CHECK_COPYBACK_STATUS:
857 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
858 case RAIDFRAME_SET_AUTOCONFIG:
859 case RAIDFRAME_SET_ROOT:
860 case RAIDFRAME_DELETE_COMPONENT:
861 case RAIDFRAME_INCORPORATE_HOT_SPARE:
862 if ((rs->sc_flags & RAIDF_INITED) == 0)
863 return (ENXIO);
864 }
865
866 switch (cmd) {
867
868 /* configure the system */
869 case RAIDFRAME_CONFIGURE:
870
871 if (raidPtr->valid) {
872 /* There is a valid RAID set running on this unit! */
873 printf("raid%d: Device already configured!\n",unit);
874 return(EINVAL);
875 }
876
877 /* copy-in the configuration information */
878 /* data points to a pointer to the configuration structure */
879
880 u_cfg = *((RF_Config_t **) data);
881 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
882 if (k_cfg == NULL) {
883 return (ENOMEM);
884 }
885 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
886 sizeof(RF_Config_t));
887 if (retcode) {
888 RF_Free(k_cfg, sizeof(RF_Config_t));
889 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
890 retcode));
891 return (retcode);
892 }
893 /* allocate a buffer for the layout-specific data, and copy it
894 * in */
895 if (k_cfg->layoutSpecificSize) {
896 if (k_cfg->layoutSpecificSize > 10000) {
897 /* sanity check */
898 RF_Free(k_cfg, sizeof(RF_Config_t));
899 return (EINVAL);
900 }
901 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
902 (u_char *));
903 if (specific_buf == NULL) {
904 RF_Free(k_cfg, sizeof(RF_Config_t));
905 return (ENOMEM);
906 }
907 retcode = copyin(k_cfg->layoutSpecific,
908 (caddr_t) specific_buf,
909 k_cfg->layoutSpecificSize);
910 if (retcode) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 RF_Free(specific_buf,
913 k_cfg->layoutSpecificSize);
914 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
915 retcode));
916 return (retcode);
917 }
918 } else
919 specific_buf = NULL;
920 k_cfg->layoutSpecific = specific_buf;
921
922 /* should do some kind of sanity check on the configuration.
923 * Store the sum of all the bytes in the last byte? */
924
925 /* configure the system */
926
927 /*
928 * Clear the entire RAID descriptor, just to make sure
929 * there is no stale data left in the case of a
930 * reconfiguration
931 */
932 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
933 raidPtr->raidid = unit;
934
935 retcode = rf_Configure(raidPtr, k_cfg, NULL);
936
937 if (retcode == 0) {
938
939 /* allow this many simultaneous IO's to
940 this RAID device */
941 raidPtr->openings = RAIDOUTSTANDING;
942
943 raidinit(raidPtr);
944 rf_markalldirty(raidPtr);
945 }
946 /* free the buffers. No return code here. */
947 if (k_cfg->layoutSpecificSize) {
948 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
949 }
950 RF_Free(k_cfg, sizeof(RF_Config_t));
951
952 return (retcode);
953
954 /* shutdown the system */
955 case RAIDFRAME_SHUTDOWN:
956
957 if ((error = raidlock(rs)) != 0)
958 return (error);
959
960 /*
961 * If somebody has a partition mounted, we shouldn't
962 * shutdown.
963 */
964
965 part = DISKPART(dev);
966 pmask = (1 << part);
967 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
968 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
969 (rs->sc_dkdev.dk_copenmask & pmask))) {
970 raidunlock(rs);
971 return (EBUSY);
972 }
973
974 retcode = rf_Shutdown(raidPtr);
975
976 /* It's no longer initialized... */
977 rs->sc_flags &= ~RAIDF_INITED;
978
979 /* Detach the disk. */
980 disk_detach(&rs->sc_dkdev);
981
982 raidunlock(rs);
983
984 return (retcode);
985 case RAIDFRAME_GET_COMPONENT_LABEL:
986 clabel_ptr = (RF_ComponentLabel_t **) data;
987 /* need to read the component label for the disk indicated
988 by row,column in clabel */
989
990 /* For practice, let's get it directly fromdisk, rather
991 than from the in-core copy */
992 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
993 (RF_ComponentLabel_t *));
994 if (clabel == NULL)
995 return (ENOMEM);
996
997 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
998
999 retcode = copyin( *clabel_ptr, clabel,
1000 sizeof(RF_ComponentLabel_t));
1001
1002 if (retcode) {
1003 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1004 return(retcode);
1005 }
1006
1007 row = clabel->row;
1008 column = clabel->column;
1009
1010 if ((row < 0) || (row >= raidPtr->numRow) ||
1011 (column < 0) || (column >= raidPtr->numCol +
1012 raidPtr->numSpare)) {
1013 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1014 return(EINVAL);
1015 }
1016
1017 raidread_component_label(raidPtr->Disks[row][column].dev,
1018 raidPtr->raid_cinfo[row][column].ci_vp,
1019 clabel );
1020
1021 retcode = copyout((caddr_t) clabel,
1022 (caddr_t) *clabel_ptr,
1023 sizeof(RF_ComponentLabel_t));
1024 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1025 return (retcode);
1026
1027 case RAIDFRAME_SET_COMPONENT_LABEL:
1028 clabel = (RF_ComponentLabel_t *) data;
1029
1030 /* XXX check the label for valid stuff... */
1031 /* Note that some things *should not* get modified --
1032 the user should be re-initing the labels instead of
1033 trying to patch things.
1034 */
1035
1036 raidid = raidPtr->raidid;
1037 printf("raid%d: Got component label:\n", raidid);
1038 printf("raid%d: Version: %d\n", raidid, clabel->version);
1039 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1040 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1041 printf("raid%d: Row: %d\n", raidid, clabel->row);
1042 printf("raid%d: Column: %d\n", raidid, clabel->column);
1043 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1044 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1045 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1046 printf("raid%d: Status: %d\n", raidid, clabel->status);
1047
1048 row = clabel->row;
1049 column = clabel->column;
1050
1051 if ((row < 0) || (row >= raidPtr->numRow) ||
1052 (column < 0) || (column >= raidPtr->numCol)) {
1053 return(EINVAL);
1054 }
1055
1056 /* XXX this isn't allowed to do anything for now :-) */
1057
1058 /* XXX and before it is, we need to fill in the rest
1059 of the fields!?!?!?! */
1060 #if 0
1061 raidwrite_component_label(
1062 raidPtr->Disks[row][column].dev,
1063 raidPtr->raid_cinfo[row][column].ci_vp,
1064 clabel );
1065 #endif
1066 return (0);
1067
1068 case RAIDFRAME_INIT_LABELS:
1069 clabel = (RF_ComponentLabel_t *) data;
1070 /*
1071 we only want the serial number from
1072 the above. We get all the rest of the information
1073 from the config that was used to create this RAID
1074 set.
1075 */
1076
1077 raidPtr->serial_number = clabel->serial_number;
1078
1079 raid_init_component_label(raidPtr, &ci_label);
1080 ci_label.serial_number = clabel->serial_number;
1081
1082 for(row=0;row<raidPtr->numRow;row++) {
1083 ci_label.row = row;
1084 for(column=0;column<raidPtr->numCol;column++) {
1085 diskPtr = &raidPtr->Disks[row][column];
1086 if (!RF_DEAD_DISK(diskPtr->status)) {
1087 ci_label.partitionSize = diskPtr->partitionSize;
1088 ci_label.column = column;
1089 raidwrite_component_label(
1090 raidPtr->Disks[row][column].dev,
1091 raidPtr->raid_cinfo[row][column].ci_vp,
1092 &ci_label );
1093 }
1094 }
1095 }
1096
1097 return (retcode);
1098 case RAIDFRAME_SET_AUTOCONFIG:
1099 d = rf_set_autoconfig(raidPtr, *(int *) data);
1100 printf("raid%d: New autoconfig value is: %d\n",
1101 raidPtr->raidid, d);
1102 *(int *) data = d;
1103 return (retcode);
1104
1105 case RAIDFRAME_SET_ROOT:
1106 d = rf_set_rootpartition(raidPtr, *(int *) data);
1107 printf("raid%d: New rootpartition value is: %d\n",
1108 raidPtr->raidid, d);
1109 *(int *) data = d;
1110 return (retcode);
1111
1112 /* initialize all parity */
1113 case RAIDFRAME_REWRITEPARITY:
1114
1115 if (raidPtr->Layout.map->faultsTolerated == 0) {
1116 /* Parity for RAID 0 is trivially correct */
1117 raidPtr->parity_good = RF_RAID_CLEAN;
1118 return(0);
1119 }
1120
1121 if (raidPtr->parity_rewrite_in_progress == 1) {
1122 /* Re-write is already in progress! */
1123 return(EINVAL);
1124 }
1125
1126 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1127 rf_RewriteParityThread,
1128 raidPtr,"raid_parity");
1129 return (retcode);
1130
1131
1132 case RAIDFRAME_ADD_HOT_SPARE:
1133 sparePtr = (RF_SingleComponent_t *) data;
1134 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1135 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1136 return(retcode);
1137
1138 case RAIDFRAME_REMOVE_HOT_SPARE:
1139 return(retcode);
1140
1141 case RAIDFRAME_DELETE_COMPONENT:
1142 componentPtr = (RF_SingleComponent_t *)data;
1143 memcpy( &component, componentPtr,
1144 sizeof(RF_SingleComponent_t));
1145 retcode = rf_delete_component(raidPtr, &component);
1146 return(retcode);
1147
1148 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1149 componentPtr = (RF_SingleComponent_t *)data;
1150 memcpy( &component, componentPtr,
1151 sizeof(RF_SingleComponent_t));
1152 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1153 return(retcode);
1154
1155 case RAIDFRAME_REBUILD_IN_PLACE:
1156
1157 if (raidPtr->Layout.map->faultsTolerated == 0) {
1158 /* Can't do this on a RAID 0!! */
1159 return(EINVAL);
1160 }
1161
1162 if (raidPtr->recon_in_progress == 1) {
1163 /* a reconstruct is already in progress! */
1164 return(EINVAL);
1165 }
1166
1167 componentPtr = (RF_SingleComponent_t *) data;
1168 memcpy( &component, componentPtr,
1169 sizeof(RF_SingleComponent_t));
1170 row = component.row;
1171 column = component.column;
1172 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1173 row, column);
1174 if ((row < 0) || (row >= raidPtr->numRow) ||
1175 (column < 0) || (column >= raidPtr->numCol)) {
1176 return(EINVAL);
1177 }
1178
1179 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1180 if (rrcopy == NULL)
1181 return(ENOMEM);
1182
1183 rrcopy->raidPtr = (void *) raidPtr;
1184 rrcopy->row = row;
1185 rrcopy->col = column;
1186
1187 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1188 rf_ReconstructInPlaceThread,
1189 rrcopy,"raid_reconip");
1190 return(retcode);
1191
1192 case RAIDFRAME_GET_INFO:
1193 if (!raidPtr->valid)
1194 return (ENODEV);
1195 ucfgp = (RF_DeviceConfig_t **) data;
1196 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1197 (RF_DeviceConfig_t *));
1198 if (d_cfg == NULL)
1199 return (ENOMEM);
1200 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1201 d_cfg->rows = raidPtr->numRow;
1202 d_cfg->cols = raidPtr->numCol;
1203 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1204 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1205 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1206 return (ENOMEM);
1207 }
1208 d_cfg->nspares = raidPtr->numSpare;
1209 if (d_cfg->nspares >= RF_MAX_DISKS) {
1210 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1211 return (ENOMEM);
1212 }
1213 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1214 d = 0;
1215 for (i = 0; i < d_cfg->rows; i++) {
1216 for (j = 0; j < d_cfg->cols; j++) {
1217 d_cfg->devs[d] = raidPtr->Disks[i][j];
1218 d++;
1219 }
1220 }
1221 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1222 d_cfg->spares[i] = raidPtr->Disks[0][j];
1223 }
1224 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1225 sizeof(RF_DeviceConfig_t));
1226 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1227
1228 return (retcode);
1229
1230 case RAIDFRAME_CHECK_PARITY:
1231 *(int *) data = raidPtr->parity_good;
1232 return (0);
1233
1234 case RAIDFRAME_RESET_ACCTOTALS:
1235 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1236 return (0);
1237
1238 case RAIDFRAME_GET_ACCTOTALS:
1239 totals = (RF_AccTotals_t *) data;
1240 *totals = raidPtr->acc_totals;
1241 return (0);
1242
1243 case RAIDFRAME_KEEP_ACCTOTALS:
1244 raidPtr->keep_acc_totals = *(int *)data;
1245 return (0);
1246
1247 case RAIDFRAME_GET_SIZE:
1248 *(int *) data = raidPtr->totalSectors;
1249 return (0);
1250
1251 /* fail a disk & optionally start reconstruction */
1252 case RAIDFRAME_FAIL_DISK:
1253
1254 if (raidPtr->Layout.map->faultsTolerated == 0) {
1255 /* Can't do this on a RAID 0!! */
1256 return(EINVAL);
1257 }
1258
1259 rr = (struct rf_recon_req *) data;
1260
1261 if (rr->row < 0 || rr->row >= raidPtr->numRow
1262 || rr->col < 0 || rr->col >= raidPtr->numCol)
1263 return (EINVAL);
1264
1265 printf("raid%d: Failing the disk: row: %d col: %d\n",
1266 unit, rr->row, rr->col);
1267
1268 /* make a copy of the recon request so that we don't rely on
1269 * the user's buffer */
1270 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1271 if (rrcopy == NULL)
1272 return(ENOMEM);
1273 memcpy(rrcopy, rr, sizeof(*rr));
1274 rrcopy->raidPtr = (void *) raidPtr;
1275
1276 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1277 rf_ReconThread,
1278 rrcopy,"raid_recon");
1279 return (0);
1280
1281 /* invoke a copyback operation after recon on whatever disk
1282 * needs it, if any */
1283 case RAIDFRAME_COPYBACK:
1284
1285 if (raidPtr->Layout.map->faultsTolerated == 0) {
1286 /* This makes no sense on a RAID 0!! */
1287 return(EINVAL);
1288 }
1289
1290 if (raidPtr->copyback_in_progress == 1) {
1291 /* Copyback is already in progress! */
1292 return(EINVAL);
1293 }
1294
1295 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1296 rf_CopybackThread,
1297 raidPtr,"raid_copyback");
1298 return (retcode);
1299
1300 /* return the percentage completion of reconstruction */
1301 case RAIDFRAME_CHECK_RECON_STATUS:
1302 if (raidPtr->Layout.map->faultsTolerated == 0) {
1303 /* This makes no sense on a RAID 0, so tell the
1304 user it's done. */
1305 *(int *) data = 100;
1306 return(0);
1307 }
1308 row = 0; /* XXX we only consider a single row... */
1309 if (raidPtr->status[row] != rf_rs_reconstructing)
1310 *(int *) data = 100;
1311 else
1312 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1313 return (0);
1314 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1315 progressInfoPtr = (RF_ProgressInfo_t **) data;
1316 row = 0; /* XXX we only consider a single row... */
1317 if (raidPtr->status[row] != rf_rs_reconstructing) {
1318 progressInfo.remaining = 0;
1319 progressInfo.completed = 100;
1320 progressInfo.total = 100;
1321 } else {
1322 progressInfo.total =
1323 raidPtr->reconControl[row]->numRUsTotal;
1324 progressInfo.completed =
1325 raidPtr->reconControl[row]->numRUsComplete;
1326 progressInfo.remaining = progressInfo.total -
1327 progressInfo.completed;
1328 }
1329 retcode = copyout((caddr_t) &progressInfo,
1330 (caddr_t) *progressInfoPtr,
1331 sizeof(RF_ProgressInfo_t));
1332 return (retcode);
1333
1334 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1335 if (raidPtr->Layout.map->faultsTolerated == 0) {
1336 /* This makes no sense on a RAID 0, so tell the
1337 user it's done. */
1338 *(int *) data = 100;
1339 return(0);
1340 }
1341 if (raidPtr->parity_rewrite_in_progress == 1) {
1342 *(int *) data = 100 *
1343 raidPtr->parity_rewrite_stripes_done /
1344 raidPtr->Layout.numStripe;
1345 } else {
1346 *(int *) data = 100;
1347 }
1348 return (0);
1349
1350 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1351 progressInfoPtr = (RF_ProgressInfo_t **) data;
1352 if (raidPtr->parity_rewrite_in_progress == 1) {
1353 progressInfo.total = raidPtr->Layout.numStripe;
1354 progressInfo.completed =
1355 raidPtr->parity_rewrite_stripes_done;
1356 progressInfo.remaining = progressInfo.total -
1357 progressInfo.completed;
1358 } else {
1359 progressInfo.remaining = 0;
1360 progressInfo.completed = 100;
1361 progressInfo.total = 100;
1362 }
1363 retcode = copyout((caddr_t) &progressInfo,
1364 (caddr_t) *progressInfoPtr,
1365 sizeof(RF_ProgressInfo_t));
1366 return (retcode);
1367
1368 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1369 if (raidPtr->Layout.map->faultsTolerated == 0) {
1370 /* This makes no sense on a RAID 0 */
1371 *(int *) data = 100;
1372 return(0);
1373 }
1374 if (raidPtr->copyback_in_progress == 1) {
1375 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1376 raidPtr->Layout.numStripe;
1377 } else {
1378 *(int *) data = 100;
1379 }
1380 return (0);
1381
1382 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1383 progressInfoPtr = (RF_ProgressInfo_t **) data;
1384 if (raidPtr->copyback_in_progress == 1) {
1385 progressInfo.total = raidPtr->Layout.numStripe;
1386 progressInfo.completed =
1387 raidPtr->copyback_stripes_done;
1388 progressInfo.remaining = progressInfo.total -
1389 progressInfo.completed;
1390 } else {
1391 progressInfo.remaining = 0;
1392 progressInfo.completed = 100;
1393 progressInfo.total = 100;
1394 }
1395 retcode = copyout((caddr_t) &progressInfo,
1396 (caddr_t) *progressInfoPtr,
1397 sizeof(RF_ProgressInfo_t));
1398 return (retcode);
1399
1400 /* the sparetable daemon calls this to wait for the kernel to
1401 * need a spare table. this ioctl does not return until a
1402 * spare table is needed. XXX -- calling mpsleep here in the
1403 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1404 * -- I should either compute the spare table in the kernel,
1405 * or have a different -- XXX XXX -- interface (a different
1406 * character device) for delivering the table -- XXX */
1407 #if 0
1408 case RAIDFRAME_SPARET_WAIT:
1409 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1410 while (!rf_sparet_wait_queue)
1411 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1412 waitreq = rf_sparet_wait_queue;
1413 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1414 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1415
1416 /* structure assignment */
1417 *((RF_SparetWait_t *) data) = *waitreq;
1418
1419 RF_Free(waitreq, sizeof(*waitreq));
1420 return (0);
1421
1422 /* wakes up a process waiting on SPARET_WAIT and puts an error
1423 * code in it that will cause the dameon to exit */
1424 case RAIDFRAME_ABORT_SPARET_WAIT:
1425 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1426 waitreq->fcol = -1;
1427 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1428 waitreq->next = rf_sparet_wait_queue;
1429 rf_sparet_wait_queue = waitreq;
1430 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1431 wakeup(&rf_sparet_wait_queue);
1432 return (0);
1433
1434 /* used by the spare table daemon to deliver a spare table
1435 * into the kernel */
1436 case RAIDFRAME_SEND_SPARET:
1437
1438 /* install the spare table */
1439 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1440
1441 /* respond to the requestor. the return status of the spare
1442 * table installation is passed in the "fcol" field */
1443 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1444 waitreq->fcol = retcode;
1445 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1446 waitreq->next = rf_sparet_resp_queue;
1447 rf_sparet_resp_queue = waitreq;
1448 wakeup(&rf_sparet_resp_queue);
1449 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1450
1451 return (retcode);
1452 #endif
1453
1454 default:
1455 break; /* fall through to the os-specific code below */
1456
1457 }
1458
1459 if (!raidPtr->valid)
1460 return (EINVAL);
1461
1462 /*
1463 * Add support for "regular" device ioctls here.
1464 */
1465
1466 switch (cmd) {
1467 case DIOCGDINFO:
1468 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1469 break;
1470 #ifdef __HAVE_OLD_DISKLABEL
1471 case ODIOCGDINFO:
1472 newlabel = *(rs->sc_dkdev.dk_label);
1473 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1474 return ENOTTY;
1475 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1476 break;
1477 #endif
1478
1479 case DIOCGPART:
1480 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1481 ((struct partinfo *) data)->part =
1482 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1483 break;
1484
1485 case DIOCWDINFO:
1486 case DIOCSDINFO:
1487 #ifdef __HAVE_OLD_DISKLABEL
1488 case ODIOCWDINFO:
1489 case ODIOCSDINFO:
1490 #endif
1491 {
1492 struct disklabel *lp;
1493 #ifdef __HAVE_OLD_DISKLABEL
1494 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1495 memset(&newlabel, 0, sizeof newlabel);
1496 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1497 lp = &newlabel;
1498 } else
1499 #endif
1500 lp = (struct disklabel *)data;
1501
1502 if ((error = raidlock(rs)) != 0)
1503 return (error);
1504
1505 rs->sc_flags |= RAIDF_LABELLING;
1506
1507 error = setdisklabel(rs->sc_dkdev.dk_label,
1508 lp, 0, rs->sc_dkdev.dk_cpulabel);
1509 if (error == 0) {
1510 if (cmd == DIOCWDINFO
1511 #ifdef __HAVE_OLD_DISKLABEL
1512 || cmd == ODIOCWDINFO
1513 #endif
1514 )
1515 error = writedisklabel(RAIDLABELDEV(dev),
1516 raidstrategy, rs->sc_dkdev.dk_label,
1517 rs->sc_dkdev.dk_cpulabel);
1518 }
1519 rs->sc_flags &= ~RAIDF_LABELLING;
1520
1521 raidunlock(rs);
1522
1523 if (error)
1524 return (error);
1525 break;
1526 }
1527
1528 case DIOCWLABEL:
1529 if (*(int *) data != 0)
1530 rs->sc_flags |= RAIDF_WLABEL;
1531 else
1532 rs->sc_flags &= ~RAIDF_WLABEL;
1533 break;
1534
1535 case DIOCGDEFLABEL:
1536 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1537 break;
1538
1539 #ifdef __HAVE_OLD_DISKLABEL
1540 case ODIOCGDEFLABEL:
1541 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1542 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1543 return ENOTTY;
1544 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1545 break;
1546 #endif
1547
1548 default:
1549 retcode = ENOTTY;
1550 }
1551 return (retcode);
1552
1553 }
1554
1555
1556 /* raidinit -- complete the rest of the initialization for the
1557 RAIDframe device. */
1558
1559
1560 static void
1561 raidinit(raidPtr)
1562 RF_Raid_t *raidPtr;
1563 {
1564 struct raid_softc *rs;
1565 int unit;
1566
1567 unit = raidPtr->raidid;
1568
1569 rs = &raid_softc[unit];
1570
1571 /* XXX should check return code first... */
1572 rs->sc_flags |= RAIDF_INITED;
1573
1574 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1575
1576 rs->sc_dkdev.dk_name = rs->sc_xname;
1577
1578 /* disk_attach actually creates space for the CPU disklabel, among
1579 * other things, so it's critical to call this *BEFORE* we try putzing
1580 * with disklabels. */
1581
1582 disk_attach(&rs->sc_dkdev);
1583
1584 /* XXX There may be a weird interaction here between this, and
1585 * protectedSectors, as used in RAIDframe. */
1586
1587 rs->sc_size = raidPtr->totalSectors;
1588
1589 }
1590
1591 /* wake up the daemon & tell it to get us a spare table
1592 * XXX
1593 * the entries in the queues should be tagged with the raidPtr
1594 * so that in the extremely rare case that two recons happen at once,
1595 * we know for which device were requesting a spare table
1596 * XXX
1597 *
1598 * XXX This code is not currently used. GO
1599 */
1600 int
1601 rf_GetSpareTableFromDaemon(req)
1602 RF_SparetWait_t *req;
1603 {
1604 int retcode;
1605
1606 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1607 req->next = rf_sparet_wait_queue;
1608 rf_sparet_wait_queue = req;
1609 wakeup(&rf_sparet_wait_queue);
1610
1611 /* mpsleep unlocks the mutex */
1612 while (!rf_sparet_resp_queue) {
1613 tsleep(&rf_sparet_resp_queue, PRIBIO,
1614 "raidframe getsparetable", 0);
1615 }
1616 req = rf_sparet_resp_queue;
1617 rf_sparet_resp_queue = req->next;
1618 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1619
1620 retcode = req->fcol;
1621 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1622 * alloc'd */
1623 return (retcode);
1624 }
1625
1626 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1627 * bp & passes it down.
1628 * any calls originating in the kernel must use non-blocking I/O
1629 * do some extra sanity checking to return "appropriate" error values for
1630 * certain conditions (to make some standard utilities work)
1631 *
1632 * Formerly known as: rf_DoAccessKernel
1633 */
1634 void
1635 raidstart(raidPtr)
1636 RF_Raid_t *raidPtr;
1637 {
1638 RF_SectorCount_t num_blocks, pb, sum;
1639 RF_RaidAddr_t raid_addr;
1640 int retcode;
1641 struct partition *pp;
1642 daddr_t blocknum;
1643 int unit;
1644 struct raid_softc *rs;
1645 int do_async;
1646 struct buf *bp;
1647
1648 unit = raidPtr->raidid;
1649 rs = &raid_softc[unit];
1650
1651 /* quick check to see if anything has died recently */
1652 RF_LOCK_MUTEX(raidPtr->mutex);
1653 if (raidPtr->numNewFailures > 0) {
1654 rf_update_component_labels(raidPtr,
1655 RF_NORMAL_COMPONENT_UPDATE);
1656 raidPtr->numNewFailures--;
1657 }
1658
1659 /* Check to see if we're at the limit... */
1660 while (raidPtr->openings > 0) {
1661 RF_UNLOCK_MUTEX(raidPtr->mutex);
1662
1663 /* get the next item, if any, from the queue */
1664 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1665 /* nothing more to do */
1666 return;
1667 }
1668
1669 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1670 * partition.. Need to make it absolute to the underlying
1671 * device.. */
1672
1673 blocknum = bp->b_blkno;
1674 if (DISKPART(bp->b_dev) != RAW_PART) {
1675 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1676 blocknum += pp->p_offset;
1677 }
1678
1679 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1680 (int) blocknum));
1681
1682 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1683 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1684
1685 /* *THIS* is where we adjust what block we're going to...
1686 * but DO NOT TOUCH bp->b_blkno!!! */
1687 raid_addr = blocknum;
1688
1689 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1690 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1691 sum = raid_addr + num_blocks + pb;
1692 if (1 || rf_debugKernelAccess) {
1693 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1694 (int) raid_addr, (int) sum, (int) num_blocks,
1695 (int) pb, (int) bp->b_resid));
1696 }
1697 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1698 || (sum < num_blocks) || (sum < pb)) {
1699 bp->b_error = ENOSPC;
1700 bp->b_flags |= B_ERROR;
1701 bp->b_resid = bp->b_bcount;
1702 biodone(bp);
1703 RF_LOCK_MUTEX(raidPtr->mutex);
1704 continue;
1705 }
1706 /*
1707 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1708 */
1709
1710 if (bp->b_bcount & raidPtr->sectorMask) {
1711 bp->b_error = EINVAL;
1712 bp->b_flags |= B_ERROR;
1713 bp->b_resid = bp->b_bcount;
1714 biodone(bp);
1715 RF_LOCK_MUTEX(raidPtr->mutex);
1716 continue;
1717
1718 }
1719 db1_printf(("Calling DoAccess..\n"));
1720
1721
1722 RF_LOCK_MUTEX(raidPtr->mutex);
1723 raidPtr->openings--;
1724 RF_UNLOCK_MUTEX(raidPtr->mutex);
1725
1726 /*
1727 * Everything is async.
1728 */
1729 do_async = 1;
1730
1731 disk_busy(&rs->sc_dkdev);
1732
1733 /* XXX we're still at splbio() here... do we *really*
1734 need to be? */
1735
1736 /* don't ever condition on bp->b_flags & B_WRITE.
1737 * always condition on B_READ instead */
1738
1739 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1740 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1741 do_async, raid_addr, num_blocks,
1742 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1743
1744 RF_LOCK_MUTEX(raidPtr->mutex);
1745 }
1746 RF_UNLOCK_MUTEX(raidPtr->mutex);
1747 }
1748
1749
1750
1751
1752 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1753
1754 int
1755 rf_DispatchKernelIO(queue, req)
1756 RF_DiskQueue_t *queue;
1757 RF_DiskQueueData_t *req;
1758 {
1759 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1760 struct buf *bp;
1761 struct raidbuf *raidbp = NULL;
1762 struct raid_softc *rs;
1763 int unit;
1764 int s;
1765
1766 s=0;
1767 /* s = splbio();*/ /* want to test this */
1768 /* XXX along with the vnode, we also need the softc associated with
1769 * this device.. */
1770
1771 req->queue = queue;
1772
1773 unit = queue->raidPtr->raidid;
1774
1775 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1776
1777 if (unit >= numraid) {
1778 printf("Invalid unit number: %d %d\n", unit, numraid);
1779 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1780 }
1781 rs = &raid_softc[unit];
1782
1783 bp = req->bp;
1784 #if 1
1785 /* XXX when there is a physical disk failure, someone is passing us a
1786 * buffer that contains old stuff!! Attempt to deal with this problem
1787 * without taking a performance hit... (not sure where the real bug
1788 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1789
1790 if (bp->b_flags & B_ERROR) {
1791 bp->b_flags &= ~B_ERROR;
1792 }
1793 if (bp->b_error != 0) {
1794 bp->b_error = 0;
1795 }
1796 #endif
1797 raidbp = RAIDGETBUF(rs);
1798
1799 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1800
1801 /*
1802 * context for raidiodone
1803 */
1804 raidbp->rf_obp = bp;
1805 raidbp->req = req;
1806
1807 LIST_INIT(&raidbp->rf_buf.b_dep);
1808
1809 switch (req->type) {
1810 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1811 /* XXX need to do something extra here.. */
1812 /* I'm leaving this in, as I've never actually seen it used,
1813 * and I'd like folks to report it... GO */
1814 printf(("WAKEUP CALLED\n"));
1815 queue->numOutstanding++;
1816
1817 /* XXX need to glue the original buffer into this?? */
1818
1819 KernelWakeupFunc(&raidbp->rf_buf);
1820 break;
1821
1822 case RF_IO_TYPE_READ:
1823 case RF_IO_TYPE_WRITE:
1824
1825 if (req->tracerec) {
1826 RF_ETIMER_START(req->tracerec->timer);
1827 }
1828 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1829 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1830 req->sectorOffset, req->numSector,
1831 req->buf, KernelWakeupFunc, (void *) req,
1832 queue->raidPtr->logBytesPerSector, req->b_proc);
1833
1834 if (rf_debugKernelAccess) {
1835 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1836 (long) bp->b_blkno));
1837 }
1838 queue->numOutstanding++;
1839 queue->last_deq_sector = req->sectorOffset;
1840 /* acc wouldn't have been let in if there were any pending
1841 * reqs at any other priority */
1842 queue->curPriority = req->priority;
1843
1844 db1_printf(("Going for %c to unit %d row %d col %d\n",
1845 req->type, unit, queue->row, queue->col));
1846 db1_printf(("sector %d count %d (%d bytes) %d\n",
1847 (int) req->sectorOffset, (int) req->numSector,
1848 (int) (req->numSector <<
1849 queue->raidPtr->logBytesPerSector),
1850 (int) queue->raidPtr->logBytesPerSector));
1851 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1852 raidbp->rf_buf.b_vp->v_numoutput++;
1853 }
1854 VOP_STRATEGY(&raidbp->rf_buf);
1855
1856 break;
1857
1858 default:
1859 panic("bad req->type in rf_DispatchKernelIO");
1860 }
1861 db1_printf(("Exiting from DispatchKernelIO\n"));
1862 /* splx(s); */ /* want to test this */
1863 return (0);
1864 }
1865 /* this is the callback function associated with a I/O invoked from
1866 kernel code.
1867 */
1868 static void
1869 KernelWakeupFunc(vbp)
1870 struct buf *vbp;
1871 {
1872 RF_DiskQueueData_t *req = NULL;
1873 RF_DiskQueue_t *queue;
1874 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1875 struct buf *bp;
1876 struct raid_softc *rs;
1877 int unit;
1878 int s;
1879
1880 s = splbio();
1881 db1_printf(("recovering the request queue:\n"));
1882 req = raidbp->req;
1883
1884 bp = raidbp->rf_obp;
1885
1886 queue = (RF_DiskQueue_t *) req->queue;
1887
1888 if (raidbp->rf_buf.b_flags & B_ERROR) {
1889 bp->b_flags |= B_ERROR;
1890 bp->b_error = raidbp->rf_buf.b_error ?
1891 raidbp->rf_buf.b_error : EIO;
1892 }
1893
1894 /* XXX methinks this could be wrong... */
1895 #if 1
1896 bp->b_resid = raidbp->rf_buf.b_resid;
1897 #endif
1898
1899 if (req->tracerec) {
1900 RF_ETIMER_STOP(req->tracerec->timer);
1901 RF_ETIMER_EVAL(req->tracerec->timer);
1902 RF_LOCK_MUTEX(rf_tracing_mutex);
1903 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1904 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1905 req->tracerec->num_phys_ios++;
1906 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1907 }
1908 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1909
1910 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1911
1912
1913 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1914 * ballistic, and mark the component as hosed... */
1915
1916 if (bp->b_flags & B_ERROR) {
1917 /* Mark the disk as dead */
1918 /* but only mark it once... */
1919 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1920 rf_ds_optimal) {
1921 printf("raid%d: IO Error. Marking %s as failed.\n",
1922 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1923 queue->raidPtr->Disks[queue->row][queue->col].status =
1924 rf_ds_failed;
1925 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1926 queue->raidPtr->numFailures++;
1927 queue->raidPtr->numNewFailures++;
1928 } else { /* Disk is already dead... */
1929 /* printf("Disk already marked as dead!\n"); */
1930 }
1931
1932 }
1933
1934 rs = &raid_softc[unit];
1935 RAIDPUTBUF(rs, raidbp);
1936
1937 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1938 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1939
1940 splx(s);
1941 }
1942
1943
1944
1945 /*
1946 * initialize a buf structure for doing an I/O in the kernel.
1947 */
1948 static void
1949 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1950 logBytesPerSector, b_proc)
1951 struct buf *bp;
1952 struct vnode *b_vp;
1953 unsigned rw_flag;
1954 dev_t dev;
1955 RF_SectorNum_t startSect;
1956 RF_SectorCount_t numSect;
1957 caddr_t buf;
1958 void (*cbFunc) (struct buf *);
1959 void *cbArg;
1960 int logBytesPerSector;
1961 struct proc *b_proc;
1962 {
1963 /* bp->b_flags = B_PHYS | rw_flag; */
1964 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1965 bp->b_bcount = numSect << logBytesPerSector;
1966 bp->b_bufsize = bp->b_bcount;
1967 bp->b_error = 0;
1968 bp->b_dev = dev;
1969 bp->b_data = buf;
1970 bp->b_blkno = startSect;
1971 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1972 if (bp->b_bcount == 0) {
1973 panic("bp->b_bcount is zero in InitBP!!\n");
1974 }
1975 bp->b_proc = b_proc;
1976 bp->b_iodone = cbFunc;
1977 bp->b_vp = b_vp;
1978
1979 }
1980
1981 static void
1982 raidgetdefaultlabel(raidPtr, rs, lp)
1983 RF_Raid_t *raidPtr;
1984 struct raid_softc *rs;
1985 struct disklabel *lp;
1986 {
1987 db1_printf(("Building a default label...\n"));
1988 memset(lp, 0, sizeof(*lp));
1989
1990 /* fabricate a label... */
1991 lp->d_secperunit = raidPtr->totalSectors;
1992 lp->d_secsize = raidPtr->bytesPerSector;
1993 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1994 lp->d_ntracks = 4 * raidPtr->numCol;
1995 lp->d_ncylinders = raidPtr->totalSectors /
1996 (lp->d_nsectors * lp->d_ntracks);
1997 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1998
1999 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2000 lp->d_type = DTYPE_RAID;
2001 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2002 lp->d_rpm = 3600;
2003 lp->d_interleave = 1;
2004 lp->d_flags = 0;
2005
2006 lp->d_partitions[RAW_PART].p_offset = 0;
2007 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2008 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2009 lp->d_npartitions = RAW_PART + 1;
2010
2011 lp->d_magic = DISKMAGIC;
2012 lp->d_magic2 = DISKMAGIC;
2013 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2014
2015 }
2016 /*
2017 * Read the disklabel from the raid device. If one is not present, fake one
2018 * up.
2019 */
2020 static void
2021 raidgetdisklabel(dev)
2022 dev_t dev;
2023 {
2024 int unit = raidunit(dev);
2025 struct raid_softc *rs = &raid_softc[unit];
2026 char *errstring;
2027 struct disklabel *lp = rs->sc_dkdev.dk_label;
2028 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2029 RF_Raid_t *raidPtr;
2030
2031 db1_printf(("Getting the disklabel...\n"));
2032
2033 memset(clp, 0, sizeof(*clp));
2034
2035 raidPtr = raidPtrs[unit];
2036
2037 raidgetdefaultlabel(raidPtr, rs, lp);
2038
2039 /*
2040 * Call the generic disklabel extraction routine.
2041 */
2042 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2043 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2044 if (errstring)
2045 raidmakedisklabel(rs);
2046 else {
2047 int i;
2048 struct partition *pp;
2049
2050 /*
2051 * Sanity check whether the found disklabel is valid.
2052 *
2053 * This is necessary since total size of the raid device
2054 * may vary when an interleave is changed even though exactly
2055 * same componets are used, and old disklabel may used
2056 * if that is found.
2057 */
2058 if (lp->d_secperunit != rs->sc_size)
2059 printf("raid%d: WARNING: %s: "
2060 "total sector size in disklabel (%d) != "
2061 "the size of raid (%ld)\n", unit, rs->sc_xname,
2062 lp->d_secperunit, (long) rs->sc_size);
2063 for (i = 0; i < lp->d_npartitions; i++) {
2064 pp = &lp->d_partitions[i];
2065 if (pp->p_offset + pp->p_size > rs->sc_size)
2066 printf("raid%d: WARNING: %s: end of partition `%c' "
2067 "exceeds the size of raid (%ld)\n",
2068 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2069 }
2070 }
2071
2072 }
2073 /*
2074 * Take care of things one might want to take care of in the event
2075 * that a disklabel isn't present.
2076 */
2077 static void
2078 raidmakedisklabel(rs)
2079 struct raid_softc *rs;
2080 {
2081 struct disklabel *lp = rs->sc_dkdev.dk_label;
2082 db1_printf(("Making a label..\n"));
2083
2084 /*
2085 * For historical reasons, if there's no disklabel present
2086 * the raw partition must be marked FS_BSDFFS.
2087 */
2088
2089 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2090
2091 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2092
2093 lp->d_checksum = dkcksum(lp);
2094 }
2095 /*
2096 * Lookup the provided name in the filesystem. If the file exists,
2097 * is a valid block device, and isn't being used by anyone else,
2098 * set *vpp to the file's vnode.
2099 * You'll find the original of this in ccd.c
2100 */
2101 int
2102 raidlookup(path, p, vpp)
2103 char *path;
2104 struct proc *p;
2105 struct vnode **vpp; /* result */
2106 {
2107 struct nameidata nd;
2108 struct vnode *vp;
2109 struct vattr va;
2110 int error;
2111
2112 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2113 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2114 #if 0
2115 printf("RAIDframe: vn_open returned %d\n", error);
2116 #endif
2117 return (error);
2118 }
2119 vp = nd.ni_vp;
2120 if (vp->v_usecount > 1) {
2121 VOP_UNLOCK(vp, 0);
2122 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2123 return (EBUSY);
2124 }
2125 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2126 VOP_UNLOCK(vp, 0);
2127 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2128 return (error);
2129 }
2130 /* XXX: eventually we should handle VREG, too. */
2131 if (va.va_type != VBLK) {
2132 VOP_UNLOCK(vp, 0);
2133 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2134 return (ENOTBLK);
2135 }
2136 VOP_UNLOCK(vp, 0);
2137 *vpp = vp;
2138 return (0);
2139 }
2140 /*
2141 * Wait interruptibly for an exclusive lock.
2142 *
2143 * XXX
2144 * Several drivers do this; it should be abstracted and made MP-safe.
2145 * (Hmm... where have we seen this warning before :-> GO )
2146 */
2147 static int
2148 raidlock(rs)
2149 struct raid_softc *rs;
2150 {
2151 int error;
2152
2153 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2154 rs->sc_flags |= RAIDF_WANTED;
2155 if ((error =
2156 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2157 return (error);
2158 }
2159 rs->sc_flags |= RAIDF_LOCKED;
2160 return (0);
2161 }
2162 /*
2163 * Unlock and wake up any waiters.
2164 */
2165 static void
2166 raidunlock(rs)
2167 struct raid_softc *rs;
2168 {
2169
2170 rs->sc_flags &= ~RAIDF_LOCKED;
2171 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2172 rs->sc_flags &= ~RAIDF_WANTED;
2173 wakeup(rs);
2174 }
2175 }
2176
2177
2178 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2179 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2180
2181 int
2182 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2183 {
2184 RF_ComponentLabel_t clabel;
2185 raidread_component_label(dev, b_vp, &clabel);
2186 clabel.mod_counter = mod_counter;
2187 clabel.clean = RF_RAID_CLEAN;
2188 raidwrite_component_label(dev, b_vp, &clabel);
2189 return(0);
2190 }
2191
2192
2193 int
2194 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2195 {
2196 RF_ComponentLabel_t clabel;
2197 raidread_component_label(dev, b_vp, &clabel);
2198 clabel.mod_counter = mod_counter;
2199 clabel.clean = RF_RAID_DIRTY;
2200 raidwrite_component_label(dev, b_vp, &clabel);
2201 return(0);
2202 }
2203
2204 /* ARGSUSED */
2205 int
2206 raidread_component_label(dev, b_vp, clabel)
2207 dev_t dev;
2208 struct vnode *b_vp;
2209 RF_ComponentLabel_t *clabel;
2210 {
2211 struct buf *bp;
2212 int error;
2213
2214 /* XXX should probably ensure that we don't try to do this if
2215 someone has changed rf_protected_sectors. */
2216
2217 if (b_vp == NULL) {
2218 /* For whatever reason, this component is not valid.
2219 Don't try to read a component label from it. */
2220 return(EINVAL);
2221 }
2222
2223 /* get a block of the appropriate size... */
2224 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2225 bp->b_dev = dev;
2226
2227 /* get our ducks in a row for the read */
2228 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2229 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2230 bp->b_flags |= B_READ;
2231 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2232
2233 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2234
2235 error = biowait(bp);
2236
2237 if (!error) {
2238 memcpy(clabel, bp->b_data,
2239 sizeof(RF_ComponentLabel_t));
2240 #if 0
2241 rf_print_component_label( clabel );
2242 #endif
2243 } else {
2244 #if 0
2245 printf("Failed to read RAID component label!\n");
2246 #endif
2247 }
2248
2249 brelse(bp);
2250 return(error);
2251 }
2252 /* ARGSUSED */
2253 int
2254 raidwrite_component_label(dev, b_vp, clabel)
2255 dev_t dev;
2256 struct vnode *b_vp;
2257 RF_ComponentLabel_t *clabel;
2258 {
2259 struct buf *bp;
2260 int error;
2261
2262 /* get a block of the appropriate size... */
2263 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2264 bp->b_dev = dev;
2265
2266 /* get our ducks in a row for the write */
2267 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2268 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2269 bp->b_flags |= B_WRITE;
2270 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2271
2272 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2273
2274 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2275
2276 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2277 error = biowait(bp);
2278 brelse(bp);
2279 if (error) {
2280 #if 1
2281 printf("Failed to write RAID component info!\n");
2282 #endif
2283 }
2284
2285 return(error);
2286 }
2287
2288 void
2289 rf_markalldirty(raidPtr)
2290 RF_Raid_t *raidPtr;
2291 {
2292 RF_ComponentLabel_t clabel;
2293 int r,c;
2294
2295 raidPtr->mod_counter++;
2296 for (r = 0; r < raidPtr->numRow; r++) {
2297 for (c = 0; c < raidPtr->numCol; c++) {
2298 /* we don't want to touch (at all) a disk that has
2299 failed */
2300 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2301 raidread_component_label(
2302 raidPtr->Disks[r][c].dev,
2303 raidPtr->raid_cinfo[r][c].ci_vp,
2304 &clabel);
2305 if (clabel.status == rf_ds_spared) {
2306 /* XXX do something special...
2307 but whatever you do, don't
2308 try to access it!! */
2309 } else {
2310 #if 0
2311 clabel.status =
2312 raidPtr->Disks[r][c].status;
2313 raidwrite_component_label(
2314 raidPtr->Disks[r][c].dev,
2315 raidPtr->raid_cinfo[r][c].ci_vp,
2316 &clabel);
2317 #endif
2318 raidmarkdirty(
2319 raidPtr->Disks[r][c].dev,
2320 raidPtr->raid_cinfo[r][c].ci_vp,
2321 raidPtr->mod_counter);
2322 }
2323 }
2324 }
2325 }
2326 /* printf("Component labels marked dirty.\n"); */
2327 #if 0
2328 for( c = 0; c < raidPtr->numSpare ; c++) {
2329 sparecol = raidPtr->numCol + c;
2330 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2331 /*
2332
2333 XXX this is where we get fancy and map this spare
2334 into it's correct spot in the array.
2335
2336 */
2337 /*
2338
2339 we claim this disk is "optimal" if it's
2340 rf_ds_used_spare, as that means it should be
2341 directly substitutable for the disk it replaced.
2342 We note that too...
2343
2344 */
2345
2346 for(i=0;i<raidPtr->numRow;i++) {
2347 for(j=0;j<raidPtr->numCol;j++) {
2348 if ((raidPtr->Disks[i][j].spareRow ==
2349 r) &&
2350 (raidPtr->Disks[i][j].spareCol ==
2351 sparecol)) {
2352 srow = r;
2353 scol = sparecol;
2354 break;
2355 }
2356 }
2357 }
2358
2359 raidread_component_label(
2360 raidPtr->Disks[r][sparecol].dev,
2361 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2362 &clabel);
2363 /* make sure status is noted */
2364 clabel.version = RF_COMPONENT_LABEL_VERSION;
2365 clabel.mod_counter = raidPtr->mod_counter;
2366 clabel.serial_number = raidPtr->serial_number;
2367 clabel.row = srow;
2368 clabel.column = scol;
2369 clabel.num_rows = raidPtr->numRow;
2370 clabel.num_columns = raidPtr->numCol;
2371 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2372 clabel.status = rf_ds_optimal;
2373 raidwrite_component_label(
2374 raidPtr->Disks[r][sparecol].dev,
2375 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2376 &clabel);
2377 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2378 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2379 }
2380 }
2381
2382 #endif
2383 }
2384
2385
2386 void
2387 rf_update_component_labels(raidPtr, final)
2388 RF_Raid_t *raidPtr;
2389 int final;
2390 {
2391 RF_ComponentLabel_t clabel;
2392 int sparecol;
2393 int r,c;
2394 int i,j;
2395 int srow, scol;
2396
2397 srow = -1;
2398 scol = -1;
2399
2400 /* XXX should do extra checks to make sure things really are clean,
2401 rather than blindly setting the clean bit... */
2402
2403 raidPtr->mod_counter++;
2404
2405 for (r = 0; r < raidPtr->numRow; r++) {
2406 for (c = 0; c < raidPtr->numCol; c++) {
2407 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2408 raidread_component_label(
2409 raidPtr->Disks[r][c].dev,
2410 raidPtr->raid_cinfo[r][c].ci_vp,
2411 &clabel);
2412 /* make sure status is noted */
2413 clabel.status = rf_ds_optimal;
2414 /* bump the counter */
2415 clabel.mod_counter = raidPtr->mod_counter;
2416
2417 raidwrite_component_label(
2418 raidPtr->Disks[r][c].dev,
2419 raidPtr->raid_cinfo[r][c].ci_vp,
2420 &clabel);
2421 if (final == RF_FINAL_COMPONENT_UPDATE) {
2422 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2423 raidmarkclean(
2424 raidPtr->Disks[r][c].dev,
2425 raidPtr->raid_cinfo[r][c].ci_vp,
2426 raidPtr->mod_counter);
2427 }
2428 }
2429 }
2430 /* else we don't touch it.. */
2431 }
2432 }
2433
2434 for( c = 0; c < raidPtr->numSpare ; c++) {
2435 sparecol = raidPtr->numCol + c;
2436 /* Need to ensure that the reconstruct actually completed! */
2437 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2438 /*
2439
2440 we claim this disk is "optimal" if it's
2441 rf_ds_used_spare, as that means it should be
2442 directly substitutable for the disk it replaced.
2443 We note that too...
2444
2445 */
2446
2447 for(i=0;i<raidPtr->numRow;i++) {
2448 for(j=0;j<raidPtr->numCol;j++) {
2449 if ((raidPtr->Disks[i][j].spareRow ==
2450 0) &&
2451 (raidPtr->Disks[i][j].spareCol ==
2452 sparecol)) {
2453 srow = i;
2454 scol = j;
2455 break;
2456 }
2457 }
2458 }
2459
2460 /* XXX shouldn't *really* need this... */
2461 raidread_component_label(
2462 raidPtr->Disks[0][sparecol].dev,
2463 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2464 &clabel);
2465 /* make sure status is noted */
2466
2467 raid_init_component_label(raidPtr, &clabel);
2468
2469 clabel.mod_counter = raidPtr->mod_counter;
2470 clabel.row = srow;
2471 clabel.column = scol;
2472 clabel.status = rf_ds_optimal;
2473
2474 raidwrite_component_label(
2475 raidPtr->Disks[0][sparecol].dev,
2476 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2477 &clabel);
2478 if (final == RF_FINAL_COMPONENT_UPDATE) {
2479 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2480 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2481 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2482 raidPtr->mod_counter);
2483 }
2484 }
2485 }
2486 }
2487 /* printf("Component labels updated\n"); */
2488 }
2489
2490 void
2491 rf_close_component(raidPtr, vp, auto_configured)
2492 RF_Raid_t *raidPtr;
2493 struct vnode *vp;
2494 int auto_configured;
2495 {
2496 struct proc *p;
2497
2498 p = raidPtr->engine_thread;
2499
2500 if (vp != NULL) {
2501 if (auto_configured == 1) {
2502 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2503 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2504 vput(vp);
2505
2506 } else {
2507 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2508 }
2509 } else {
2510 #if 0
2511 printf("vnode was NULL\n");
2512 #endif
2513 }
2514 }
2515
2516
2517 void
2518 rf_UnconfigureVnodes(raidPtr)
2519 RF_Raid_t *raidPtr;
2520 {
2521 int r,c;
2522 struct proc *p;
2523 struct vnode *vp;
2524 int acd;
2525
2526
2527 /* We take this opportunity to close the vnodes like we should.. */
2528
2529 p = raidPtr->engine_thread;
2530
2531 for (r = 0; r < raidPtr->numRow; r++) {
2532 for (c = 0; c < raidPtr->numCol; c++) {
2533 #if 0
2534 printf("raid%d: Closing vnode for row: %d col: %d\n",
2535 raidPtr->raidid, r, c);
2536 #endif
2537 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2538 acd = raidPtr->Disks[r][c].auto_configured;
2539 rf_close_component(raidPtr, vp, acd);
2540 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2541 raidPtr->Disks[r][c].auto_configured = 0;
2542 }
2543 }
2544 for (r = 0; r < raidPtr->numSpare; r++) {
2545 #if 0
2546 printf("raid%d: Closing vnode for spare: %d\n",
2547 raidPtr->raidid, r);
2548 #endif
2549 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2550 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2551 rf_close_component(raidPtr, vp, acd);
2552 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2553 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2554 }
2555 }
2556
2557
2558 void
2559 rf_ReconThread(req)
2560 struct rf_recon_req *req;
2561 {
2562 int s;
2563 RF_Raid_t *raidPtr;
2564
2565 s = splbio();
2566 raidPtr = (RF_Raid_t *) req->raidPtr;
2567 raidPtr->recon_in_progress = 1;
2568
2569 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2570 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2571
2572 /* XXX get rid of this! we don't need it at all.. */
2573 RF_Free(req, sizeof(*req));
2574
2575 raidPtr->recon_in_progress = 0;
2576 splx(s);
2577
2578 /* That's all... */
2579 kthread_exit(0); /* does not return */
2580 }
2581
2582 void
2583 rf_RewriteParityThread(raidPtr)
2584 RF_Raid_t *raidPtr;
2585 {
2586 int retcode;
2587 int s;
2588
2589 raidPtr->parity_rewrite_in_progress = 1;
2590 s = splbio();
2591 retcode = rf_RewriteParity(raidPtr);
2592 splx(s);
2593 if (retcode) {
2594 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2595 } else {
2596 /* set the clean bit! If we shutdown correctly,
2597 the clean bit on each component label will get
2598 set */
2599 raidPtr->parity_good = RF_RAID_CLEAN;
2600 }
2601 raidPtr->parity_rewrite_in_progress = 0;
2602
2603 /* Anyone waiting for us to stop? If so, inform them... */
2604 if (raidPtr->waitShutdown) {
2605 wakeup(&raidPtr->parity_rewrite_in_progress);
2606 }
2607
2608 /* That's all... */
2609 kthread_exit(0); /* does not return */
2610 }
2611
2612
2613 void
2614 rf_CopybackThread(raidPtr)
2615 RF_Raid_t *raidPtr;
2616 {
2617 int s;
2618
2619 raidPtr->copyback_in_progress = 1;
2620 s = splbio();
2621 rf_CopybackReconstructedData(raidPtr);
2622 splx(s);
2623 raidPtr->copyback_in_progress = 0;
2624
2625 /* That's all... */
2626 kthread_exit(0); /* does not return */
2627 }
2628
2629
2630 void
2631 rf_ReconstructInPlaceThread(req)
2632 struct rf_recon_req *req;
2633 {
2634 int retcode;
2635 int s;
2636 RF_Raid_t *raidPtr;
2637
2638 s = splbio();
2639 raidPtr = req->raidPtr;
2640 raidPtr->recon_in_progress = 1;
2641 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2642 RF_Free(req, sizeof(*req));
2643 raidPtr->recon_in_progress = 0;
2644 splx(s);
2645
2646 /* That's all... */
2647 kthread_exit(0); /* does not return */
2648 }
2649
2650 void
2651 rf_mountroot_hook(dev)
2652 struct device *dev;
2653 {
2654
2655 }
2656
2657
2658 RF_AutoConfig_t *
2659 rf_find_raid_components()
2660 {
2661 struct devnametobdevmaj *dtobdm;
2662 struct vnode *vp;
2663 struct disklabel label;
2664 struct device *dv;
2665 char *cd_name;
2666 dev_t dev;
2667 int error;
2668 int i;
2669 int good_one;
2670 RF_ComponentLabel_t *clabel;
2671 RF_AutoConfig_t *ac_list;
2672 RF_AutoConfig_t *ac;
2673
2674
2675 /* initialize the AutoConfig list */
2676 ac_list = NULL;
2677
2678 /* we begin by trolling through *all* the devices on the system */
2679
2680 for (dv = alldevs.tqh_first; dv != NULL;
2681 dv = dv->dv_list.tqe_next) {
2682
2683 /* we are only interested in disks... */
2684 if (dv->dv_class != DV_DISK)
2685 continue;
2686
2687 /* we don't care about floppies... */
2688 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2689 continue;
2690 }
2691
2692 /* we don't care about CD's... */
2693 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2694 continue;
2695 }
2696
2697 /* hdfd is the Atari/Hades floppy driver */
2698 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2699 continue;
2700 }
2701 /* fdisa is the Atari/Milan floppy driver */
2702 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2703 continue;
2704 }
2705
2706 /* need to find the device_name_to_block_device_major stuff */
2707 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2708 dtobdm = dev_name2blk;
2709 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2710 dtobdm++;
2711 }
2712
2713 /* get a vnode for the raw partition of this disk */
2714
2715 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2716 if (bdevvp(dev, &vp))
2717 panic("RAID can't alloc vnode");
2718
2719 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2720
2721 if (error) {
2722 /* "Who cares." Continue looking
2723 for something that exists*/
2724 vput(vp);
2725 continue;
2726 }
2727
2728 /* Ok, the disk exists. Go get the disklabel. */
2729 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2730 FREAD, NOCRED, 0);
2731 if (error) {
2732 /*
2733 * XXX can't happen - open() would
2734 * have errored out (or faked up one)
2735 */
2736 printf("can't get label for dev %s%c (%d)!?!?\n",
2737 dv->dv_xname, 'a' + RAW_PART, error);
2738 }
2739
2740 /* don't need this any more. We'll allocate it again
2741 a little later if we really do... */
2742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2743 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2744 vput(vp);
2745
2746 for (i=0; i < label.d_npartitions; i++) {
2747 /* We only support partitions marked as RAID */
2748 if (label.d_partitions[i].p_fstype != FS_RAID)
2749 continue;
2750
2751 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2752 if (bdevvp(dev, &vp))
2753 panic("RAID can't alloc vnode");
2754
2755 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2756 if (error) {
2757 /* Whatever... */
2758 vput(vp);
2759 continue;
2760 }
2761
2762 good_one = 0;
2763
2764 clabel = (RF_ComponentLabel_t *)
2765 malloc(sizeof(RF_ComponentLabel_t),
2766 M_RAIDFRAME, M_NOWAIT);
2767 if (clabel == NULL) {
2768 /* XXX CLEANUP HERE */
2769 printf("RAID auto config: out of memory!\n");
2770 return(NULL); /* XXX probably should panic? */
2771 }
2772
2773 if (!raidread_component_label(dev, vp, clabel)) {
2774 /* Got the label. Does it look reasonable? */
2775 if (rf_reasonable_label(clabel) &&
2776 (clabel->partitionSize <=
2777 label.d_partitions[i].p_size)) {
2778 #if DEBUG
2779 printf("Component on: %s%c: %d\n",
2780 dv->dv_xname, 'a'+i,
2781 label.d_partitions[i].p_size);
2782 rf_print_component_label(clabel);
2783 #endif
2784 /* if it's reasonable, add it,
2785 else ignore it. */
2786 ac = (RF_AutoConfig_t *)
2787 malloc(sizeof(RF_AutoConfig_t),
2788 M_RAIDFRAME,
2789 M_NOWAIT);
2790 if (ac == NULL) {
2791 /* XXX should panic?? */
2792 return(NULL);
2793 }
2794
2795 sprintf(ac->devname, "%s%c",
2796 dv->dv_xname, 'a'+i);
2797 ac->dev = dev;
2798 ac->vp = vp;
2799 ac->clabel = clabel;
2800 ac->next = ac_list;
2801 ac_list = ac;
2802 good_one = 1;
2803 }
2804 }
2805 if (!good_one) {
2806 /* cleanup */
2807 free(clabel, M_RAIDFRAME);
2808 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2809 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2810 vput(vp);
2811 }
2812 }
2813 }
2814 return(ac_list);
2815 }
2816
2817 static int
2818 rf_reasonable_label(clabel)
2819 RF_ComponentLabel_t *clabel;
2820 {
2821
2822 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2823 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2824 ((clabel->clean == RF_RAID_CLEAN) ||
2825 (clabel->clean == RF_RAID_DIRTY)) &&
2826 clabel->row >=0 &&
2827 clabel->column >= 0 &&
2828 clabel->num_rows > 0 &&
2829 clabel->num_columns > 0 &&
2830 clabel->row < clabel->num_rows &&
2831 clabel->column < clabel->num_columns &&
2832 clabel->blockSize > 0 &&
2833 clabel->numBlocks > 0) {
2834 /* label looks reasonable enough... */
2835 return(1);
2836 }
2837 return(0);
2838 }
2839
2840
2841 void
2842 rf_print_component_label(clabel)
2843 RF_ComponentLabel_t *clabel;
2844 {
2845 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2846 clabel->row, clabel->column,
2847 clabel->num_rows, clabel->num_columns);
2848 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2849 clabel->version, clabel->serial_number,
2850 clabel->mod_counter);
2851 printf(" Clean: %s Status: %d\n",
2852 clabel->clean ? "Yes" : "No", clabel->status );
2853 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2854 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2855 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2856 (char) clabel->parityConfig, clabel->blockSize,
2857 clabel->numBlocks);
2858 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2859 printf(" Contains root partition: %s\n",
2860 clabel->root_partition ? "Yes" : "No" );
2861 printf(" Last configured as: raid%d\n", clabel->last_unit );
2862 #if 0
2863 printf(" Config order: %d\n", clabel->config_order);
2864 #endif
2865
2866 }
2867
2868 RF_ConfigSet_t *
2869 rf_create_auto_sets(ac_list)
2870 RF_AutoConfig_t *ac_list;
2871 {
2872 RF_AutoConfig_t *ac;
2873 RF_ConfigSet_t *config_sets;
2874 RF_ConfigSet_t *cset;
2875 RF_AutoConfig_t *ac_next;
2876
2877
2878 config_sets = NULL;
2879
2880 /* Go through the AutoConfig list, and figure out which components
2881 belong to what sets. */
2882 ac = ac_list;
2883 while(ac!=NULL) {
2884 /* we're going to putz with ac->next, so save it here
2885 for use at the end of the loop */
2886 ac_next = ac->next;
2887
2888 if (config_sets == NULL) {
2889 /* will need at least this one... */
2890 config_sets = (RF_ConfigSet_t *)
2891 malloc(sizeof(RF_ConfigSet_t),
2892 M_RAIDFRAME, M_NOWAIT);
2893 if (config_sets == NULL) {
2894 panic("rf_create_auto_sets: No memory!\n");
2895 }
2896 /* this one is easy :) */
2897 config_sets->ac = ac;
2898 config_sets->next = NULL;
2899 config_sets->rootable = 0;
2900 ac->next = NULL;
2901 } else {
2902 /* which set does this component fit into? */
2903 cset = config_sets;
2904 while(cset!=NULL) {
2905 if (rf_does_it_fit(cset, ac)) {
2906 /* looks like it matches... */
2907 ac->next = cset->ac;
2908 cset->ac = ac;
2909 break;
2910 }
2911 cset = cset->next;
2912 }
2913 if (cset==NULL) {
2914 /* didn't find a match above... new set..*/
2915 cset = (RF_ConfigSet_t *)
2916 malloc(sizeof(RF_ConfigSet_t),
2917 M_RAIDFRAME, M_NOWAIT);
2918 if (cset == NULL) {
2919 panic("rf_create_auto_sets: No memory!\n");
2920 }
2921 cset->ac = ac;
2922 ac->next = NULL;
2923 cset->next = config_sets;
2924 cset->rootable = 0;
2925 config_sets = cset;
2926 }
2927 }
2928 ac = ac_next;
2929 }
2930
2931
2932 return(config_sets);
2933 }
2934
2935 static int
2936 rf_does_it_fit(cset, ac)
2937 RF_ConfigSet_t *cset;
2938 RF_AutoConfig_t *ac;
2939 {
2940 RF_ComponentLabel_t *clabel1, *clabel2;
2941
2942 /* If this one matches the *first* one in the set, that's good
2943 enough, since the other members of the set would have been
2944 through here too... */
2945 /* note that we are not checking partitionSize here..
2946
2947 Note that we are also not checking the mod_counters here.
2948 If everything else matches execpt the mod_counter, that's
2949 good enough for this test. We will deal with the mod_counters
2950 a little later in the autoconfiguration process.
2951
2952 (clabel1->mod_counter == clabel2->mod_counter) &&
2953
2954 The reason we don't check for this is that failed disks
2955 will have lower modification counts. If those disks are
2956 not added to the set they used to belong to, then they will
2957 form their own set, which may result in 2 different sets,
2958 for example, competing to be configured at raid0, and
2959 perhaps competing to be the root filesystem set. If the
2960 wrong ones get configured, or both attempt to become /,
2961 weird behaviour and or serious lossage will occur. Thus we
2962 need to bring them into the fold here, and kick them out at
2963 a later point.
2964
2965 */
2966
2967 clabel1 = cset->ac->clabel;
2968 clabel2 = ac->clabel;
2969 if ((clabel1->version == clabel2->version) &&
2970 (clabel1->serial_number == clabel2->serial_number) &&
2971 (clabel1->num_rows == clabel2->num_rows) &&
2972 (clabel1->num_columns == clabel2->num_columns) &&
2973 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2974 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2975 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2976 (clabel1->parityConfig == clabel2->parityConfig) &&
2977 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2978 (clabel1->blockSize == clabel2->blockSize) &&
2979 (clabel1->numBlocks == clabel2->numBlocks) &&
2980 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2981 (clabel1->root_partition == clabel2->root_partition) &&
2982 (clabel1->last_unit == clabel2->last_unit) &&
2983 (clabel1->config_order == clabel2->config_order)) {
2984 /* if it get's here, it almost *has* to be a match */
2985 } else {
2986 /* it's not consistent with somebody in the set..
2987 punt */
2988 return(0);
2989 }
2990 /* all was fine.. it must fit... */
2991 return(1);
2992 }
2993
2994 int
2995 rf_have_enough_components(cset)
2996 RF_ConfigSet_t *cset;
2997 {
2998 RF_AutoConfig_t *ac;
2999 RF_AutoConfig_t *auto_config;
3000 RF_ComponentLabel_t *clabel;
3001 int r,c;
3002 int num_rows;
3003 int num_cols;
3004 int num_missing;
3005 int mod_counter;
3006 int mod_counter_found;
3007 int even_pair_failed;
3008 char parity_type;
3009
3010
3011 /* check to see that we have enough 'live' components
3012 of this set. If so, we can configure it if necessary */
3013
3014 num_rows = cset->ac->clabel->num_rows;
3015 num_cols = cset->ac->clabel->num_columns;
3016 parity_type = cset->ac->clabel->parityConfig;
3017
3018 /* XXX Check for duplicate components!?!?!? */
3019
3020 /* Determine what the mod_counter is supposed to be for this set. */
3021
3022 mod_counter_found = 0;
3023 mod_counter = 0;
3024 ac = cset->ac;
3025 while(ac!=NULL) {
3026 if (mod_counter_found==0) {
3027 mod_counter = ac->clabel->mod_counter;
3028 mod_counter_found = 1;
3029 } else {
3030 if (ac->clabel->mod_counter > mod_counter) {
3031 mod_counter = ac->clabel->mod_counter;
3032 }
3033 }
3034 ac = ac->next;
3035 }
3036
3037 num_missing = 0;
3038 auto_config = cset->ac;
3039
3040 for(r=0; r<num_rows; r++) {
3041 even_pair_failed = 0;
3042 for(c=0; c<num_cols; c++) {
3043 ac = auto_config;
3044 while(ac!=NULL) {
3045 if ((ac->clabel->row == r) &&
3046 (ac->clabel->column == c) &&
3047 (ac->clabel->mod_counter == mod_counter)) {
3048 /* it's this one... */
3049 #if DEBUG
3050 printf("Found: %s at %d,%d\n",
3051 ac->devname,r,c);
3052 #endif
3053 break;
3054 }
3055 ac=ac->next;
3056 }
3057 if (ac==NULL) {
3058 /* Didn't find one here! */
3059 /* special case for RAID 1, especially
3060 where there are more than 2
3061 components (where RAIDframe treats
3062 things a little differently :( ) */
3063 if (parity_type == '1') {
3064 if (c%2 == 0) { /* even component */
3065 even_pair_failed = 1;
3066 } else { /* odd component. If
3067 we're failed, and
3068 so is the even
3069 component, it's
3070 "Good Night, Charlie" */
3071 if (even_pair_failed == 1) {
3072 return(0);
3073 }
3074 }
3075 } else {
3076 /* normal accounting */
3077 num_missing++;
3078 }
3079 }
3080 if ((parity_type == '1') && (c%2 == 1)) {
3081 /* Just did an even component, and we didn't
3082 bail.. reset the even_pair_failed flag,
3083 and go on to the next component.... */
3084 even_pair_failed = 0;
3085 }
3086 }
3087 }
3088
3089 clabel = cset->ac->clabel;
3090
3091 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3092 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3093 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3094 /* XXX this needs to be made *much* more general */
3095 /* Too many failures */
3096 return(0);
3097 }
3098 /* otherwise, all is well, and we've got enough to take a kick
3099 at autoconfiguring this set */
3100 return(1);
3101 }
3102
3103 void
3104 rf_create_configuration(ac,config,raidPtr)
3105 RF_AutoConfig_t *ac;
3106 RF_Config_t *config;
3107 RF_Raid_t *raidPtr;
3108 {
3109 RF_ComponentLabel_t *clabel;
3110 int i;
3111
3112 clabel = ac->clabel;
3113
3114 /* 1. Fill in the common stuff */
3115 config->numRow = clabel->num_rows;
3116 config->numCol = clabel->num_columns;
3117 config->numSpare = 0; /* XXX should this be set here? */
3118 config->sectPerSU = clabel->sectPerSU;
3119 config->SUsPerPU = clabel->SUsPerPU;
3120 config->SUsPerRU = clabel->SUsPerRU;
3121 config->parityConfig = clabel->parityConfig;
3122 /* XXX... */
3123 strcpy(config->diskQueueType,"fifo");
3124 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3125 config->layoutSpecificSize = 0; /* XXX ?? */
3126
3127 while(ac!=NULL) {
3128 /* row/col values will be in range due to the checks
3129 in reasonable_label() */
3130 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3131 ac->devname);
3132 ac = ac->next;
3133 }
3134
3135 for(i=0;i<RF_MAXDBGV;i++) {
3136 config->debugVars[i][0] = NULL;
3137 }
3138 }
3139
3140 int
3141 rf_set_autoconfig(raidPtr, new_value)
3142 RF_Raid_t *raidPtr;
3143 int new_value;
3144 {
3145 RF_ComponentLabel_t clabel;
3146 struct vnode *vp;
3147 dev_t dev;
3148 int row, column;
3149
3150 raidPtr->autoconfigure = new_value;
3151 for(row=0; row<raidPtr->numRow; row++) {
3152 for(column=0; column<raidPtr->numCol; column++) {
3153 if (raidPtr->Disks[row][column].status ==
3154 rf_ds_optimal) {
3155 dev = raidPtr->Disks[row][column].dev;
3156 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3157 raidread_component_label(dev, vp, &clabel);
3158 clabel.autoconfigure = new_value;
3159 raidwrite_component_label(dev, vp, &clabel);
3160 }
3161 }
3162 }
3163 return(new_value);
3164 }
3165
3166 int
3167 rf_set_rootpartition(raidPtr, new_value)
3168 RF_Raid_t *raidPtr;
3169 int new_value;
3170 {
3171 RF_ComponentLabel_t clabel;
3172 struct vnode *vp;
3173 dev_t dev;
3174 int row, column;
3175
3176 raidPtr->root_partition = new_value;
3177 for(row=0; row<raidPtr->numRow; row++) {
3178 for(column=0; column<raidPtr->numCol; column++) {
3179 if (raidPtr->Disks[row][column].status ==
3180 rf_ds_optimal) {
3181 dev = raidPtr->Disks[row][column].dev;
3182 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3183 raidread_component_label(dev, vp, &clabel);
3184 clabel.root_partition = new_value;
3185 raidwrite_component_label(dev, vp, &clabel);
3186 }
3187 }
3188 }
3189 return(new_value);
3190 }
3191
3192 void
3193 rf_release_all_vps(cset)
3194 RF_ConfigSet_t *cset;
3195 {
3196 RF_AutoConfig_t *ac;
3197
3198 ac = cset->ac;
3199 while(ac!=NULL) {
3200 /* Close the vp, and give it back */
3201 if (ac->vp) {
3202 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3203 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3204 vput(ac->vp);
3205 ac->vp = NULL;
3206 }
3207 ac = ac->next;
3208 }
3209 }
3210
3211
3212 void
3213 rf_cleanup_config_set(cset)
3214 RF_ConfigSet_t *cset;
3215 {
3216 RF_AutoConfig_t *ac;
3217 RF_AutoConfig_t *next_ac;
3218
3219 ac = cset->ac;
3220 while(ac!=NULL) {
3221 next_ac = ac->next;
3222 /* nuke the label */
3223 free(ac->clabel, M_RAIDFRAME);
3224 /* cleanup the config structure */
3225 free(ac, M_RAIDFRAME);
3226 /* "next.." */
3227 ac = next_ac;
3228 }
3229 /* and, finally, nuke the config set */
3230 free(cset, M_RAIDFRAME);
3231 }
3232
3233
3234 void
3235 raid_init_component_label(raidPtr, clabel)
3236 RF_Raid_t *raidPtr;
3237 RF_ComponentLabel_t *clabel;
3238 {
3239 /* current version number */
3240 clabel->version = RF_COMPONENT_LABEL_VERSION;
3241 clabel->serial_number = raidPtr->serial_number;
3242 clabel->mod_counter = raidPtr->mod_counter;
3243 clabel->num_rows = raidPtr->numRow;
3244 clabel->num_columns = raidPtr->numCol;
3245 clabel->clean = RF_RAID_DIRTY; /* not clean */
3246 clabel->status = rf_ds_optimal; /* "It's good!" */
3247
3248 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3249 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3250 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3251
3252 clabel->blockSize = raidPtr->bytesPerSector;
3253 clabel->numBlocks = raidPtr->sectorsPerDisk;
3254
3255 /* XXX not portable */
3256 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3257 clabel->maxOutstanding = raidPtr->maxOutstanding;
3258 clabel->autoconfigure = raidPtr->autoconfigure;
3259 clabel->root_partition = raidPtr->root_partition;
3260 clabel->last_unit = raidPtr->raidid;
3261 clabel->config_order = raidPtr->config_order;
3262 }
3263
3264 int
3265 rf_auto_config_set(cset,unit)
3266 RF_ConfigSet_t *cset;
3267 int *unit;
3268 {
3269 RF_Raid_t *raidPtr;
3270 RF_Config_t *config;
3271 int raidID;
3272 int retcode;
3273
3274 #if DEBUG
3275 printf("RAID autoconfigure\n");
3276 #endif
3277
3278 retcode = 0;
3279 *unit = -1;
3280
3281 /* 1. Create a config structure */
3282
3283 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3284 M_RAIDFRAME,
3285 M_NOWAIT);
3286 if (config==NULL) {
3287 printf("Out of mem!?!?\n");
3288 /* XXX do something more intelligent here. */
3289 return(1);
3290 }
3291
3292 memset(config, 0, sizeof(RF_Config_t));
3293
3294 /* XXX raidID needs to be set correctly.. */
3295
3296 /*
3297 2. Figure out what RAID ID this one is supposed to live at
3298 See if we can get the same RAID dev that it was configured
3299 on last time..
3300 */
3301
3302 raidID = cset->ac->clabel->last_unit;
3303 if ((raidID < 0) || (raidID >= numraid)) {
3304 /* let's not wander off into lala land. */
3305 raidID = numraid - 1;
3306 }
3307 if (raidPtrs[raidID]->valid != 0) {
3308
3309 /*
3310 Nope... Go looking for an alternative...
3311 Start high so we don't immediately use raid0 if that's
3312 not taken.
3313 */
3314
3315 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3316 if (raidPtrs[raidID]->valid == 0) {
3317 /* can use this one! */
3318 break;
3319 }
3320 }
3321 }
3322
3323 if (raidID < 0) {
3324 /* punt... */
3325 printf("Unable to auto configure this set!\n");
3326 printf("(Out of RAID devs!)\n");
3327 return(1);
3328 }
3329
3330 #if DEBUG
3331 printf("Configuring raid%d:\n",raidID);
3332 #endif
3333
3334 raidPtr = raidPtrs[raidID];
3335
3336 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3337 raidPtr->raidid = raidID;
3338 raidPtr->openings = RAIDOUTSTANDING;
3339
3340 /* 3. Build the configuration structure */
3341 rf_create_configuration(cset->ac, config, raidPtr);
3342
3343 /* 4. Do the configuration */
3344 retcode = rf_Configure(raidPtr, config, cset->ac);
3345
3346 if (retcode == 0) {
3347
3348 raidinit(raidPtrs[raidID]);
3349
3350 rf_markalldirty(raidPtrs[raidID]);
3351 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3352 if (cset->ac->clabel->root_partition==1) {
3353 /* everything configured just fine. Make a note
3354 that this set is eligible to be root. */
3355 cset->rootable = 1;
3356 /* XXX do this here? */
3357 raidPtrs[raidID]->root_partition = 1;
3358 }
3359 }
3360
3361 /* 5. Cleanup */
3362 free(config, M_RAIDFRAME);
3363
3364 *unit = raidID;
3365 return(retcode);
3366 }
3367
3368 void
3369 rf_disk_unbusy(desc)
3370 RF_RaidAccessDesc_t *desc;
3371 {
3372 struct buf *bp;
3373
3374 bp = (struct buf *)desc->bp;
3375 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3376 (bp->b_bcount - bp->b_resid));
3377 }
3378