rf_netbsdkintf.c revision 1.112 1 /* $NetBSD: rf_netbsdkintf.c,v 1.112 2001/11/13 07:11:14 lukem Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.112 2001/11/13 07:11:14 lukem Exp $");
118
119 #include <sys/errno.h>
120 #include <sys/param.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/param.h>
132 #include <sys/types.h>
133 #include <machine/types.h>
134 #include <sys/disklabel.h>
135 #include <sys/conf.h>
136 #include <sys/lock.h>
137 #include <sys/buf.h>
138 #include <sys/user.h>
139 #include <sys/reboot.h>
140
141 #include <dev/raidframe/raidframevar.h>
142 #include <dev/raidframe/raidframeio.h>
143 #include "raid.h"
144 #include "opt_raid_autoconfig.h"
145 #include "rf_raid.h"
146 #include "rf_copyback.h"
147 #include "rf_dag.h"
148 #include "rf_dagflags.h"
149 #include "rf_desc.h"
150 #include "rf_diskqueue.h"
151 #include "rf_acctrace.h"
152 #include "rf_etimer.h"
153 #include "rf_general.h"
154 #include "rf_debugMem.h"
155 #include "rf_kintf.h"
156 #include "rf_options.h"
157 #include "rf_driver.h"
158 #include "rf_parityscan.h"
159 #include "rf_debugprint.h"
160 #include "rf_threadstuff.h"
161
162 int rf_kdebug_level = 0;
163
164 #ifdef DEBUG
165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
166 #else /* DEBUG */
167 #define db1_printf(a) { }
168 #endif /* DEBUG */
169
170 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
171
172 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
173
174 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
175 * spare table */
176 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
177 * installation process */
178
179 /* prototypes */
180 static void KernelWakeupFunc(struct buf * bp);
181 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
182 dev_t dev, RF_SectorNum_t startSect,
183 RF_SectorCount_t numSect, caddr_t buf,
184 void (*cbFunc) (struct buf *), void *cbArg,
185 int logBytesPerSector, struct proc * b_proc);
186 static void raidinit(RF_Raid_t *);
187
188 void raidattach(int);
189 int raidsize(dev_t);
190 int raidopen(dev_t, int, int, struct proc *);
191 int raidclose(dev_t, int, int, struct proc *);
192 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
193 int raidwrite(dev_t, struct uio *, int);
194 int raidread(dev_t, struct uio *, int);
195 void raidstrategy(struct buf *);
196 int raiddump(dev_t, daddr_t, caddr_t, size_t);
197
198 /*
199 * Pilfered from ccd.c
200 */
201
202 struct raidbuf {
203 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
204 struct buf *rf_obp; /* ptr. to original I/O buf */
205 int rf_flags; /* misc. flags */
206 RF_DiskQueueData_t *req;/* the request that this was part of.. */
207 };
208
209
210 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
211 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
212
213 /* XXX Not sure if the following should be replacing the raidPtrs above,
214 or if it should be used in conjunction with that...
215 */
216
217 struct raid_softc {
218 int sc_flags; /* flags */
219 int sc_cflags; /* configuration flags */
220 size_t sc_size; /* size of the raid device */
221 char sc_xname[20]; /* XXX external name */
222 struct disk sc_dkdev; /* generic disk device info */
223 struct pool sc_cbufpool; /* component buffer pool */
224 struct buf_queue buf_queue; /* used for the device queue */
225 };
226 /* sc_flags */
227 #define RAIDF_INITED 0x01 /* unit has been initialized */
228 #define RAIDF_WLABEL 0x02 /* label area is writable */
229 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
230 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
231 #define RAIDF_LOCKED 0x80 /* unit is locked */
232
233 #define raidunit(x) DISKUNIT(x)
234 int numraid = 0;
235
236 /*
237 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
238 * Be aware that large numbers can allow the driver to consume a lot of
239 * kernel memory, especially on writes, and in degraded mode reads.
240 *
241 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
242 * a single 64K write will typically require 64K for the old data,
243 * 64K for the old parity, and 64K for the new parity, for a total
244 * of 192K (if the parity buffer is not re-used immediately).
245 * Even it if is used immediately, that's still 128K, which when multiplied
246 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
247 *
248 * Now in degraded mode, for example, a 64K read on the above setup may
249 * require data reconstruction, which will require *all* of the 4 remaining
250 * disks to participate -- 4 * 32K/disk == 128K again.
251 */
252
253 #ifndef RAIDOUTSTANDING
254 #define RAIDOUTSTANDING 6
255 #endif
256
257 #define RAIDLABELDEV(dev) \
258 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
259
260 /* declared here, and made public, for the benefit of KVM stuff.. */
261 struct raid_softc *raid_softc;
262
263 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
264 struct disklabel *);
265 static void raidgetdisklabel(dev_t);
266 static void raidmakedisklabel(struct raid_softc *);
267
268 static int raidlock(struct raid_softc *);
269 static void raidunlock(struct raid_softc *);
270
271 static void rf_markalldirty(RF_Raid_t *);
272 void rf_mountroot_hook(struct device *);
273
274 struct device *raidrootdev;
275
276 void rf_ReconThread(struct rf_recon_req *);
277 /* XXX what I want is: */
278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
280 void rf_CopybackThread(RF_Raid_t *raidPtr);
281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
282 void rf_buildroothack(void *);
283
284 RF_AutoConfig_t *rf_find_raid_components(void);
285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
287 static int rf_reasonable_label(RF_ComponentLabel_t *);
288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
289 int rf_set_autoconfig(RF_Raid_t *, int);
290 int rf_set_rootpartition(RF_Raid_t *, int);
291 void rf_release_all_vps(RF_ConfigSet_t *);
292 void rf_cleanup_config_set(RF_ConfigSet_t *);
293 int rf_have_enough_components(RF_ConfigSet_t *);
294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place.
298 Note that this is overridden by having
299 RAID_AUTOCONFIG as an option in the
300 kernel config file. */
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 rc = rf_mutex_init(&rf_sparet_wait_mutex);
333 if (rc) {
334 RF_PANIC();
335 }
336
337 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
338
339 for (i = 0; i < num; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357
358 memset(raid_softc, 0, num * sizeof(struct raid_softc));
359
360 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
361 M_RAIDFRAME, M_NOWAIT);
362 if (raidrootdev == NULL) {
363 panic("No memory for RAIDframe driver!!?!?!\n");
364 }
365
366 for (raidID = 0; raidID < num; raidID++) {
367 BUFQ_INIT(&raid_softc[raidID].buf_queue);
368
369 raidrootdev[raidID].dv_class = DV_DISK;
370 raidrootdev[raidID].dv_cfdata = NULL;
371 raidrootdev[raidID].dv_unit = raidID;
372 raidrootdev[raidID].dv_parent = NULL;
373 raidrootdev[raidID].dv_flags = 0;
374 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
375
376 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
377 (RF_Raid_t *));
378 if (raidPtrs[raidID] == NULL) {
379 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
380 numraid = raidID;
381 return;
382 }
383 }
384
385 #if RAID_AUTOCONFIG
386 raidautoconfig = 1;
387 #endif
388
389 if (raidautoconfig) {
390 /* 1. locate all RAID components on the system */
391
392 #if DEBUG
393 printf("Searching for raid components...\n");
394 #endif
395 ac_list = rf_find_raid_components();
396
397 /* 2. sort them into their respective sets */
398
399 config_sets = rf_create_auto_sets(ac_list);
400
401 /* 3. evaluate each set and configure the valid ones
402 This gets done in rf_buildroothack() */
403
404 /* schedule the creation of the thread to do the
405 "/ on RAID" stuff */
406
407 kthread_create(rf_buildroothack,config_sets);
408
409 #if 0
410 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
411 #endif
412 }
413
414 }
415
416 void
417 rf_buildroothack(arg)
418 void *arg;
419 {
420 RF_ConfigSet_t *config_sets = arg;
421 RF_ConfigSet_t *cset;
422 RF_ConfigSet_t *next_cset;
423 int retcode;
424 int raidID;
425 int rootID;
426 int num_root;
427
428 rootID = 0;
429 num_root = 0;
430 cset = config_sets;
431 while(cset != NULL ) {
432 next_cset = cset->next;
433 if (rf_have_enough_components(cset) &&
434 cset->ac->clabel->autoconfigure==1) {
435 retcode = rf_auto_config_set(cset,&raidID);
436 if (!retcode) {
437 if (cset->rootable) {
438 rootID = raidID;
439 num_root++;
440 }
441 } else {
442 /* The autoconfig didn't work :( */
443 #if DEBUG
444 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
445 #endif
446 rf_release_all_vps(cset);
447 }
448 } else {
449 /* we're not autoconfiguring this set...
450 release the associated resources */
451 rf_release_all_vps(cset);
452 }
453 /* cleanup */
454 rf_cleanup_config_set(cset);
455 cset = next_cset;
456 }
457 if (boothowto & RB_ASKNAME) {
458 /* We don't auto-config... */
459 } else {
460 /* They didn't ask, and we found something bootable... */
461
462 if (num_root == 1) {
463 booted_device = &raidrootdev[rootID];
464 } else if (num_root > 1) {
465 /* we can't guess.. require the user to answer... */
466 boothowto |= RB_ASKNAME;
467 }
468 }
469 }
470
471
472 int
473 raidsize(dev)
474 dev_t dev;
475 {
476 struct raid_softc *rs;
477 struct disklabel *lp;
478 int part, unit, omask, size;
479
480 unit = raidunit(dev);
481 if (unit >= numraid)
482 return (-1);
483 rs = &raid_softc[unit];
484
485 if ((rs->sc_flags & RAIDF_INITED) == 0)
486 return (-1);
487
488 part = DISKPART(dev);
489 omask = rs->sc_dkdev.dk_openmask & (1 << part);
490 lp = rs->sc_dkdev.dk_label;
491
492 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
493 return (-1);
494
495 if (lp->d_partitions[part].p_fstype != FS_SWAP)
496 size = -1;
497 else
498 size = lp->d_partitions[part].p_size *
499 (lp->d_secsize / DEV_BSIZE);
500
501 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
502 return (-1);
503
504 return (size);
505
506 }
507
508 int
509 raiddump(dev, blkno, va, size)
510 dev_t dev;
511 daddr_t blkno;
512 caddr_t va;
513 size_t size;
514 {
515 /* Not implemented. */
516 return ENXIO;
517 }
518 /* ARGSUSED */
519 int
520 raidopen(dev, flags, fmt, p)
521 dev_t dev;
522 int flags, fmt;
523 struct proc *p;
524 {
525 int unit = raidunit(dev);
526 struct raid_softc *rs;
527 struct disklabel *lp;
528 int part, pmask;
529 int error = 0;
530
531 if (unit >= numraid)
532 return (ENXIO);
533 rs = &raid_softc[unit];
534
535 if ((error = raidlock(rs)) != 0)
536 return (error);
537 lp = rs->sc_dkdev.dk_label;
538
539 part = DISKPART(dev);
540 pmask = (1 << part);
541
542 db1_printf(("Opening raid device number: %d partition: %d\n",
543 unit, part));
544
545
546 if ((rs->sc_flags & RAIDF_INITED) &&
547 (rs->sc_dkdev.dk_openmask == 0))
548 raidgetdisklabel(dev);
549
550 /* make sure that this partition exists */
551
552 if (part != RAW_PART) {
553 db1_printf(("Not a raw partition..\n"));
554 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
555 ((part >= lp->d_npartitions) ||
556 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
557 error = ENXIO;
558 raidunlock(rs);
559 db1_printf(("Bailing out...\n"));
560 return (error);
561 }
562 }
563 /* Prevent this unit from being unconfigured while open. */
564 switch (fmt) {
565 case S_IFCHR:
566 rs->sc_dkdev.dk_copenmask |= pmask;
567 break;
568
569 case S_IFBLK:
570 rs->sc_dkdev.dk_bopenmask |= pmask;
571 break;
572 }
573
574 if ((rs->sc_dkdev.dk_openmask == 0) &&
575 ((rs->sc_flags & RAIDF_INITED) != 0)) {
576 /* First one... mark things as dirty... Note that we *MUST*
577 have done a configure before this. I DO NOT WANT TO BE
578 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
579 THAT THEY BELONG TOGETHER!!!!! */
580 /* XXX should check to see if we're only open for reading
581 here... If so, we needn't do this, but then need some
582 other way of keeping track of what's happened.. */
583
584 rf_markalldirty( raidPtrs[unit] );
585 }
586
587
588 rs->sc_dkdev.dk_openmask =
589 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
590
591 raidunlock(rs);
592
593 return (error);
594
595
596 }
597 /* ARGSUSED */
598 int
599 raidclose(dev, flags, fmt, p)
600 dev_t dev;
601 int flags, fmt;
602 struct proc *p;
603 {
604 int unit = raidunit(dev);
605 struct raid_softc *rs;
606 int error = 0;
607 int part;
608
609 if (unit >= numraid)
610 return (ENXIO);
611 rs = &raid_softc[unit];
612
613 if ((error = raidlock(rs)) != 0)
614 return (error);
615
616 part = DISKPART(dev);
617
618 /* ...that much closer to allowing unconfiguration... */
619 switch (fmt) {
620 case S_IFCHR:
621 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
622 break;
623
624 case S_IFBLK:
625 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
626 break;
627 }
628 rs->sc_dkdev.dk_openmask =
629 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
630
631 if ((rs->sc_dkdev.dk_openmask == 0) &&
632 ((rs->sc_flags & RAIDF_INITED) != 0)) {
633 /* Last one... device is not unconfigured yet.
634 Device shutdown has taken care of setting the
635 clean bits if RAIDF_INITED is not set
636 mark things as clean... */
637 #if 0
638 printf("Last one on raid%d. Updating status.\n",unit);
639 #endif
640 rf_update_component_labels(raidPtrs[unit],
641 RF_FINAL_COMPONENT_UPDATE);
642 if (doing_shutdown) {
643 /* last one, and we're going down, so
644 lights out for this RAID set too. */
645 error = rf_Shutdown(raidPtrs[unit]);
646 pool_destroy(&rs->sc_cbufpool);
647
648 /* It's no longer initialized... */
649 rs->sc_flags &= ~RAIDF_INITED;
650
651 /* Detach the disk. */
652 disk_detach(&rs->sc_dkdev);
653 }
654 }
655
656 raidunlock(rs);
657 return (0);
658
659 }
660
661 void
662 raidstrategy(bp)
663 struct buf *bp;
664 {
665 int s;
666
667 unsigned int raidID = raidunit(bp->b_dev);
668 RF_Raid_t *raidPtr;
669 struct raid_softc *rs = &raid_softc[raidID];
670 struct disklabel *lp;
671 int wlabel;
672
673 if ((rs->sc_flags & RAIDF_INITED) ==0) {
674 bp->b_error = ENXIO;
675 bp->b_flags |= B_ERROR;
676 bp->b_resid = bp->b_bcount;
677 biodone(bp);
678 return;
679 }
680 if (raidID >= numraid || !raidPtrs[raidID]) {
681 bp->b_error = ENODEV;
682 bp->b_flags |= B_ERROR;
683 bp->b_resid = bp->b_bcount;
684 biodone(bp);
685 return;
686 }
687 raidPtr = raidPtrs[raidID];
688 if (!raidPtr->valid) {
689 bp->b_error = ENODEV;
690 bp->b_flags |= B_ERROR;
691 bp->b_resid = bp->b_bcount;
692 biodone(bp);
693 return;
694 }
695 if (bp->b_bcount == 0) {
696 db1_printf(("b_bcount is zero..\n"));
697 biodone(bp);
698 return;
699 }
700 lp = rs->sc_dkdev.dk_label;
701
702 /*
703 * Do bounds checking and adjust transfer. If there's an
704 * error, the bounds check will flag that for us.
705 */
706
707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
708 if (DISKPART(bp->b_dev) != RAW_PART)
709 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
710 db1_printf(("Bounds check failed!!:%d %d\n",
711 (int) bp->b_blkno, (int) wlabel));
712 biodone(bp);
713 return;
714 }
715 s = splbio();
716
717 bp->b_resid = 0;
718
719 /* stuff it onto our queue */
720 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
721
722 raidstart(raidPtrs[raidID]);
723
724 splx(s);
725 }
726 /* ARGSUSED */
727 int
728 raidread(dev, uio, flags)
729 dev_t dev;
730 struct uio *uio;
731 int flags;
732 {
733 int unit = raidunit(dev);
734 struct raid_softc *rs;
735 int part;
736
737 if (unit >= numraid)
738 return (ENXIO);
739 rs = &raid_softc[unit];
740
741 if ((rs->sc_flags & RAIDF_INITED) == 0)
742 return (ENXIO);
743 part = DISKPART(dev);
744
745 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
746
747 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
748
749 }
750 /* ARGSUSED */
751 int
752 raidwrite(dev, uio, flags)
753 dev_t dev;
754 struct uio *uio;
755 int flags;
756 {
757 int unit = raidunit(dev);
758 struct raid_softc *rs;
759
760 if (unit >= numraid)
761 return (ENXIO);
762 rs = &raid_softc[unit];
763
764 if ((rs->sc_flags & RAIDF_INITED) == 0)
765 return (ENXIO);
766 db1_printf(("raidwrite\n"));
767 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
768
769 }
770
771 int
772 raidioctl(dev, cmd, data, flag, p)
773 dev_t dev;
774 u_long cmd;
775 caddr_t data;
776 int flag;
777 struct proc *p;
778 {
779 int unit = raidunit(dev);
780 int error = 0;
781 int part, pmask;
782 struct raid_softc *rs;
783 RF_Config_t *k_cfg, *u_cfg;
784 RF_Raid_t *raidPtr;
785 RF_RaidDisk_t *diskPtr;
786 RF_AccTotals_t *totals;
787 RF_DeviceConfig_t *d_cfg, **ucfgp;
788 u_char *specific_buf;
789 int retcode = 0;
790 int row;
791 int column;
792 struct rf_recon_req *rrcopy, *rr;
793 RF_ComponentLabel_t *clabel;
794 RF_ComponentLabel_t ci_label;
795 RF_ComponentLabel_t **clabel_ptr;
796 RF_SingleComponent_t *sparePtr,*componentPtr;
797 RF_SingleComponent_t hot_spare;
798 RF_SingleComponent_t component;
799 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
800 int i, j, d;
801 #ifdef __HAVE_OLD_DISKLABEL
802 struct disklabel newlabel;
803 #endif
804
805 if (unit >= numraid)
806 return (ENXIO);
807 rs = &raid_softc[unit];
808 raidPtr = raidPtrs[unit];
809
810 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
811 (int) DISKPART(dev), (int) unit, (int) cmd));
812
813 /* Must be open for writes for these commands... */
814 switch (cmd) {
815 case DIOCSDINFO:
816 case DIOCWDINFO:
817 #ifdef __HAVE_OLD_DISKLABEL
818 case ODIOCWDINFO:
819 case ODIOCSDINFO:
820 #endif
821 case DIOCWLABEL:
822 if ((flag & FWRITE) == 0)
823 return (EBADF);
824 }
825
826 /* Must be initialized for these... */
827 switch (cmd) {
828 case DIOCGDINFO:
829 case DIOCSDINFO:
830 case DIOCWDINFO:
831 #ifdef __HAVE_OLD_DISKLABEL
832 case ODIOCGDINFO:
833 case ODIOCWDINFO:
834 case ODIOCSDINFO:
835 case ODIOCGDEFLABEL:
836 #endif
837 case DIOCGPART:
838 case DIOCWLABEL:
839 case DIOCGDEFLABEL:
840 case RAIDFRAME_SHUTDOWN:
841 case RAIDFRAME_REWRITEPARITY:
842 case RAIDFRAME_GET_INFO:
843 case RAIDFRAME_RESET_ACCTOTALS:
844 case RAIDFRAME_GET_ACCTOTALS:
845 case RAIDFRAME_KEEP_ACCTOTALS:
846 case RAIDFRAME_GET_SIZE:
847 case RAIDFRAME_FAIL_DISK:
848 case RAIDFRAME_COPYBACK:
849 case RAIDFRAME_CHECK_RECON_STATUS:
850 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
851 case RAIDFRAME_GET_COMPONENT_LABEL:
852 case RAIDFRAME_SET_COMPONENT_LABEL:
853 case RAIDFRAME_ADD_HOT_SPARE:
854 case RAIDFRAME_REMOVE_HOT_SPARE:
855 case RAIDFRAME_INIT_LABELS:
856 case RAIDFRAME_REBUILD_IN_PLACE:
857 case RAIDFRAME_CHECK_PARITY:
858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
860 case RAIDFRAME_CHECK_COPYBACK_STATUS:
861 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
862 case RAIDFRAME_SET_AUTOCONFIG:
863 case RAIDFRAME_SET_ROOT:
864 case RAIDFRAME_DELETE_COMPONENT:
865 case RAIDFRAME_INCORPORATE_HOT_SPARE:
866 if ((rs->sc_flags & RAIDF_INITED) == 0)
867 return (ENXIO);
868 }
869
870 switch (cmd) {
871
872 /* configure the system */
873 case RAIDFRAME_CONFIGURE:
874
875 if (raidPtr->valid) {
876 /* There is a valid RAID set running on this unit! */
877 printf("raid%d: Device already configured!\n",unit);
878 return(EINVAL);
879 }
880
881 /* copy-in the configuration information */
882 /* data points to a pointer to the configuration structure */
883
884 u_cfg = *((RF_Config_t **) data);
885 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
886 if (k_cfg == NULL) {
887 return (ENOMEM);
888 }
889 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
890 sizeof(RF_Config_t));
891 if (retcode) {
892 RF_Free(k_cfg, sizeof(RF_Config_t));
893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
894 retcode));
895 return (retcode);
896 }
897 /* allocate a buffer for the layout-specific data, and copy it
898 * in */
899 if (k_cfg->layoutSpecificSize) {
900 if (k_cfg->layoutSpecificSize > 10000) {
901 /* sanity check */
902 RF_Free(k_cfg, sizeof(RF_Config_t));
903 return (EINVAL);
904 }
905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
906 (u_char *));
907 if (specific_buf == NULL) {
908 RF_Free(k_cfg, sizeof(RF_Config_t));
909 return (ENOMEM);
910 }
911 retcode = copyin(k_cfg->layoutSpecific,
912 (caddr_t) specific_buf,
913 k_cfg->layoutSpecificSize);
914 if (retcode) {
915 RF_Free(k_cfg, sizeof(RF_Config_t));
916 RF_Free(specific_buf,
917 k_cfg->layoutSpecificSize);
918 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
919 retcode));
920 return (retcode);
921 }
922 } else
923 specific_buf = NULL;
924 k_cfg->layoutSpecific = specific_buf;
925
926 /* should do some kind of sanity check on the configuration.
927 * Store the sum of all the bytes in the last byte? */
928
929 /* configure the system */
930
931 /*
932 * Clear the entire RAID descriptor, just to make sure
933 * there is no stale data left in the case of a
934 * reconfiguration
935 */
936 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
937 raidPtr->raidid = unit;
938
939 retcode = rf_Configure(raidPtr, k_cfg, NULL);
940
941 if (retcode == 0) {
942
943 /* allow this many simultaneous IO's to
944 this RAID device */
945 raidPtr->openings = RAIDOUTSTANDING;
946
947 raidinit(raidPtr);
948 rf_markalldirty(raidPtr);
949 }
950 /* free the buffers. No return code here. */
951 if (k_cfg->layoutSpecificSize) {
952 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
953 }
954 RF_Free(k_cfg, sizeof(RF_Config_t));
955
956 return (retcode);
957
958 /* shutdown the system */
959 case RAIDFRAME_SHUTDOWN:
960
961 if ((error = raidlock(rs)) != 0)
962 return (error);
963
964 /*
965 * If somebody has a partition mounted, we shouldn't
966 * shutdown.
967 */
968
969 part = DISKPART(dev);
970 pmask = (1 << part);
971 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
972 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
973 (rs->sc_dkdev.dk_copenmask & pmask))) {
974 raidunlock(rs);
975 return (EBUSY);
976 }
977
978 retcode = rf_Shutdown(raidPtr);
979
980 pool_destroy(&rs->sc_cbufpool);
981
982 /* It's no longer initialized... */
983 rs->sc_flags &= ~RAIDF_INITED;
984
985 /* Detach the disk. */
986 disk_detach(&rs->sc_dkdev);
987
988 raidunlock(rs);
989
990 return (retcode);
991 case RAIDFRAME_GET_COMPONENT_LABEL:
992 clabel_ptr = (RF_ComponentLabel_t **) data;
993 /* need to read the component label for the disk indicated
994 by row,column in clabel */
995
996 /* For practice, let's get it directly fromdisk, rather
997 than from the in-core copy */
998 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
999 (RF_ComponentLabel_t *));
1000 if (clabel == NULL)
1001 return (ENOMEM);
1002
1003 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1004
1005 retcode = copyin( *clabel_ptr, clabel,
1006 sizeof(RF_ComponentLabel_t));
1007
1008 if (retcode) {
1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1010 return(retcode);
1011 }
1012
1013 row = clabel->row;
1014 column = clabel->column;
1015
1016 if ((row < 0) || (row >= raidPtr->numRow) ||
1017 (column < 0) || (column >= raidPtr->numCol +
1018 raidPtr->numSpare)) {
1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1020 return(EINVAL);
1021 }
1022
1023 raidread_component_label(raidPtr->Disks[row][column].dev,
1024 raidPtr->raid_cinfo[row][column].ci_vp,
1025 clabel );
1026
1027 retcode = copyout((caddr_t) clabel,
1028 (caddr_t) *clabel_ptr,
1029 sizeof(RF_ComponentLabel_t));
1030 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1031 return (retcode);
1032
1033 case RAIDFRAME_SET_COMPONENT_LABEL:
1034 clabel = (RF_ComponentLabel_t *) data;
1035
1036 /* XXX check the label for valid stuff... */
1037 /* Note that some things *should not* get modified --
1038 the user should be re-initing the labels instead of
1039 trying to patch things.
1040 */
1041
1042 printf("Got component label:\n");
1043 printf("Version: %d\n",clabel->version);
1044 printf("Serial Number: %d\n",clabel->serial_number);
1045 printf("Mod counter: %d\n",clabel->mod_counter);
1046 printf("Row: %d\n", clabel->row);
1047 printf("Column: %d\n", clabel->column);
1048 printf("Num Rows: %d\n", clabel->num_rows);
1049 printf("Num Columns: %d\n", clabel->num_columns);
1050 printf("Clean: %d\n", clabel->clean);
1051 printf("Status: %d\n", clabel->status);
1052
1053 row = clabel->row;
1054 column = clabel->column;
1055
1056 if ((row < 0) || (row >= raidPtr->numRow) ||
1057 (column < 0) || (column >= raidPtr->numCol)) {
1058 return(EINVAL);
1059 }
1060
1061 /* XXX this isn't allowed to do anything for now :-) */
1062
1063 /* XXX and before it is, we need to fill in the rest
1064 of the fields!?!?!?! */
1065 #if 0
1066 raidwrite_component_label(
1067 raidPtr->Disks[row][column].dev,
1068 raidPtr->raid_cinfo[row][column].ci_vp,
1069 clabel );
1070 #endif
1071 return (0);
1072
1073 case RAIDFRAME_INIT_LABELS:
1074 clabel = (RF_ComponentLabel_t *) data;
1075 /*
1076 we only want the serial number from
1077 the above. We get all the rest of the information
1078 from the config that was used to create this RAID
1079 set.
1080 */
1081
1082 raidPtr->serial_number = clabel->serial_number;
1083
1084 raid_init_component_label(raidPtr, &ci_label);
1085 ci_label.serial_number = clabel->serial_number;
1086
1087 for(row=0;row<raidPtr->numRow;row++) {
1088 ci_label.row = row;
1089 for(column=0;column<raidPtr->numCol;column++) {
1090 diskPtr = &raidPtr->Disks[row][column];
1091 if (!RF_DEAD_DISK(diskPtr->status)) {
1092 ci_label.partitionSize = diskPtr->partitionSize;
1093 ci_label.column = column;
1094 raidwrite_component_label(
1095 raidPtr->Disks[row][column].dev,
1096 raidPtr->raid_cinfo[row][column].ci_vp,
1097 &ci_label );
1098 }
1099 }
1100 }
1101
1102 return (retcode);
1103 case RAIDFRAME_SET_AUTOCONFIG:
1104 d = rf_set_autoconfig(raidPtr, *(int *) data);
1105 printf("New autoconfig value is: %d\n", d);
1106 *(int *) data = d;
1107 return (retcode);
1108
1109 case RAIDFRAME_SET_ROOT:
1110 d = rf_set_rootpartition(raidPtr, *(int *) data);
1111 printf("New rootpartition value is: %d\n", d);
1112 *(int *) data = d;
1113 return (retcode);
1114
1115 /* initialize all parity */
1116 case RAIDFRAME_REWRITEPARITY:
1117
1118 if (raidPtr->Layout.map->faultsTolerated == 0) {
1119 /* Parity for RAID 0 is trivially correct */
1120 raidPtr->parity_good = RF_RAID_CLEAN;
1121 return(0);
1122 }
1123
1124 if (raidPtr->parity_rewrite_in_progress == 1) {
1125 /* Re-write is already in progress! */
1126 return(EINVAL);
1127 }
1128
1129 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1130 rf_RewriteParityThread,
1131 raidPtr,"raid_parity");
1132 return (retcode);
1133
1134
1135 case RAIDFRAME_ADD_HOT_SPARE:
1136 sparePtr = (RF_SingleComponent_t *) data;
1137 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1138 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1139 return(retcode);
1140
1141 case RAIDFRAME_REMOVE_HOT_SPARE:
1142 return(retcode);
1143
1144 case RAIDFRAME_DELETE_COMPONENT:
1145 componentPtr = (RF_SingleComponent_t *)data;
1146 memcpy( &component, componentPtr,
1147 sizeof(RF_SingleComponent_t));
1148 retcode = rf_delete_component(raidPtr, &component);
1149 return(retcode);
1150
1151 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1152 componentPtr = (RF_SingleComponent_t *)data;
1153 memcpy( &component, componentPtr,
1154 sizeof(RF_SingleComponent_t));
1155 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1156 return(retcode);
1157
1158 case RAIDFRAME_REBUILD_IN_PLACE:
1159
1160 if (raidPtr->Layout.map->faultsTolerated == 0) {
1161 /* Can't do this on a RAID 0!! */
1162 return(EINVAL);
1163 }
1164
1165 if (raidPtr->recon_in_progress == 1) {
1166 /* a reconstruct is already in progress! */
1167 return(EINVAL);
1168 }
1169
1170 componentPtr = (RF_SingleComponent_t *) data;
1171 memcpy( &component, componentPtr,
1172 sizeof(RF_SingleComponent_t));
1173 row = component.row;
1174 column = component.column;
1175 printf("Rebuild: %d %d\n",row, column);
1176 if ((row < 0) || (row >= raidPtr->numRow) ||
1177 (column < 0) || (column >= raidPtr->numCol)) {
1178 return(EINVAL);
1179 }
1180
1181 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1182 if (rrcopy == NULL)
1183 return(ENOMEM);
1184
1185 rrcopy->raidPtr = (void *) raidPtr;
1186 rrcopy->row = row;
1187 rrcopy->col = column;
1188
1189 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1190 rf_ReconstructInPlaceThread,
1191 rrcopy,"raid_reconip");
1192 return(retcode);
1193
1194 case RAIDFRAME_GET_INFO:
1195 if (!raidPtr->valid)
1196 return (ENODEV);
1197 ucfgp = (RF_DeviceConfig_t **) data;
1198 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1199 (RF_DeviceConfig_t *));
1200 if (d_cfg == NULL)
1201 return (ENOMEM);
1202 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1203 d_cfg->rows = raidPtr->numRow;
1204 d_cfg->cols = raidPtr->numCol;
1205 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1206 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1207 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1208 return (ENOMEM);
1209 }
1210 d_cfg->nspares = raidPtr->numSpare;
1211 if (d_cfg->nspares >= RF_MAX_DISKS) {
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213 return (ENOMEM);
1214 }
1215 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1216 d = 0;
1217 for (i = 0; i < d_cfg->rows; i++) {
1218 for (j = 0; j < d_cfg->cols; j++) {
1219 d_cfg->devs[d] = raidPtr->Disks[i][j];
1220 d++;
1221 }
1222 }
1223 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1224 d_cfg->spares[i] = raidPtr->Disks[0][j];
1225 }
1226 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1227 sizeof(RF_DeviceConfig_t));
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229
1230 return (retcode);
1231
1232 case RAIDFRAME_CHECK_PARITY:
1233 *(int *) data = raidPtr->parity_good;
1234 return (0);
1235
1236 case RAIDFRAME_RESET_ACCTOTALS:
1237 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1238 return (0);
1239
1240 case RAIDFRAME_GET_ACCTOTALS:
1241 totals = (RF_AccTotals_t *) data;
1242 *totals = raidPtr->acc_totals;
1243 return (0);
1244
1245 case RAIDFRAME_KEEP_ACCTOTALS:
1246 raidPtr->keep_acc_totals = *(int *)data;
1247 return (0);
1248
1249 case RAIDFRAME_GET_SIZE:
1250 *(int *) data = raidPtr->totalSectors;
1251 return (0);
1252
1253 /* fail a disk & optionally start reconstruction */
1254 case RAIDFRAME_FAIL_DISK:
1255
1256 if (raidPtr->Layout.map->faultsTolerated == 0) {
1257 /* Can't do this on a RAID 0!! */
1258 return(EINVAL);
1259 }
1260
1261 rr = (struct rf_recon_req *) data;
1262
1263 if (rr->row < 0 || rr->row >= raidPtr->numRow
1264 || rr->col < 0 || rr->col >= raidPtr->numCol)
1265 return (EINVAL);
1266
1267 printf("raid%d: Failing the disk: row: %d col: %d\n",
1268 unit, rr->row, rr->col);
1269
1270 /* make a copy of the recon request so that we don't rely on
1271 * the user's buffer */
1272 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1273 if (rrcopy == NULL)
1274 return(ENOMEM);
1275 bcopy(rr, rrcopy, sizeof(*rr));
1276 rrcopy->raidPtr = (void *) raidPtr;
1277
1278 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1279 rf_ReconThread,
1280 rrcopy,"raid_recon");
1281 return (0);
1282
1283 /* invoke a copyback operation after recon on whatever disk
1284 * needs it, if any */
1285 case RAIDFRAME_COPYBACK:
1286
1287 if (raidPtr->Layout.map->faultsTolerated == 0) {
1288 /* This makes no sense on a RAID 0!! */
1289 return(EINVAL);
1290 }
1291
1292 if (raidPtr->copyback_in_progress == 1) {
1293 /* Copyback is already in progress! */
1294 return(EINVAL);
1295 }
1296
1297 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1298 rf_CopybackThread,
1299 raidPtr,"raid_copyback");
1300 return (retcode);
1301
1302 /* return the percentage completion of reconstruction */
1303 case RAIDFRAME_CHECK_RECON_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0, so tell the
1306 user it's done. */
1307 *(int *) data = 100;
1308 return(0);
1309 }
1310 row = 0; /* XXX we only consider a single row... */
1311 if (raidPtr->status[row] != rf_rs_reconstructing)
1312 *(int *) data = 100;
1313 else
1314 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1315 return (0);
1316 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1317 progressInfoPtr = (RF_ProgressInfo_t **) data;
1318 row = 0; /* XXX we only consider a single row... */
1319 if (raidPtr->status[row] != rf_rs_reconstructing) {
1320 progressInfo.remaining = 0;
1321 progressInfo.completed = 100;
1322 progressInfo.total = 100;
1323 } else {
1324 progressInfo.total =
1325 raidPtr->reconControl[row]->numRUsTotal;
1326 progressInfo.completed =
1327 raidPtr->reconControl[row]->numRUsComplete;
1328 progressInfo.remaining = progressInfo.total -
1329 progressInfo.completed;
1330 }
1331 retcode = copyout((caddr_t) &progressInfo,
1332 (caddr_t) *progressInfoPtr,
1333 sizeof(RF_ProgressInfo_t));
1334 return (retcode);
1335
1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1337 if (raidPtr->Layout.map->faultsTolerated == 0) {
1338 /* This makes no sense on a RAID 0, so tell the
1339 user it's done. */
1340 *(int *) data = 100;
1341 return(0);
1342 }
1343 if (raidPtr->parity_rewrite_in_progress == 1) {
1344 *(int *) data = 100 *
1345 raidPtr->parity_rewrite_stripes_done /
1346 raidPtr->Layout.numStripe;
1347 } else {
1348 *(int *) data = 100;
1349 }
1350 return (0);
1351
1352 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1353 progressInfoPtr = (RF_ProgressInfo_t **) data;
1354 if (raidPtr->parity_rewrite_in_progress == 1) {
1355 progressInfo.total = raidPtr->Layout.numStripe;
1356 progressInfo.completed =
1357 raidPtr->parity_rewrite_stripes_done;
1358 progressInfo.remaining = progressInfo.total -
1359 progressInfo.completed;
1360 } else {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 }
1365 retcode = copyout((caddr_t) &progressInfo,
1366 (caddr_t) *progressInfoPtr,
1367 sizeof(RF_ProgressInfo_t));
1368 return (retcode);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1371 if (raidPtr->Layout.map->faultsTolerated == 0) {
1372 /* This makes no sense on a RAID 0 */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->copyback_in_progress == 1) {
1377 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1378 raidPtr->Layout.numStripe;
1379 } else {
1380 *(int *) data = 100;
1381 }
1382 return (0);
1383
1384 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1385 progressInfoPtr = (RF_ProgressInfo_t **) data;
1386 if (raidPtr->copyback_in_progress == 1) {
1387 progressInfo.total = raidPtr->Layout.numStripe;
1388 progressInfo.completed =
1389 raidPtr->copyback_stripes_done;
1390 progressInfo.remaining = progressInfo.total -
1391 progressInfo.completed;
1392 } else {
1393 progressInfo.remaining = 0;
1394 progressInfo.completed = 100;
1395 progressInfo.total = 100;
1396 }
1397 retcode = copyout((caddr_t) &progressInfo,
1398 (caddr_t) *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 /* the sparetable daemon calls this to wait for the kernel to
1403 * need a spare table. this ioctl does not return until a
1404 * spare table is needed. XXX -- calling mpsleep here in the
1405 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1406 * -- I should either compute the spare table in the kernel,
1407 * or have a different -- XXX XXX -- interface (a different
1408 * character device) for delivering the table -- XXX */
1409 #if 0
1410 case RAIDFRAME_SPARET_WAIT:
1411 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1412 while (!rf_sparet_wait_queue)
1413 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1414 waitreq = rf_sparet_wait_queue;
1415 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1417
1418 /* structure assignment */
1419 *((RF_SparetWait_t *) data) = *waitreq;
1420
1421 RF_Free(waitreq, sizeof(*waitreq));
1422 return (0);
1423
1424 /* wakes up a process waiting on SPARET_WAIT and puts an error
1425 * code in it that will cause the dameon to exit */
1426 case RAIDFRAME_ABORT_SPARET_WAIT:
1427 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1428 waitreq->fcol = -1;
1429 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1430 waitreq->next = rf_sparet_wait_queue;
1431 rf_sparet_wait_queue = waitreq;
1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1433 wakeup(&rf_sparet_wait_queue);
1434 return (0);
1435
1436 /* used by the spare table daemon to deliver a spare table
1437 * into the kernel */
1438 case RAIDFRAME_SEND_SPARET:
1439
1440 /* install the spare table */
1441 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1442
1443 /* respond to the requestor. the return status of the spare
1444 * table installation is passed in the "fcol" field */
1445 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1446 waitreq->fcol = retcode;
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 waitreq->next = rf_sparet_resp_queue;
1449 rf_sparet_resp_queue = waitreq;
1450 wakeup(&rf_sparet_resp_queue);
1451 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1452
1453 return (retcode);
1454 #endif
1455
1456 default:
1457 break; /* fall through to the os-specific code below */
1458
1459 }
1460
1461 if (!raidPtr->valid)
1462 return (EINVAL);
1463
1464 /*
1465 * Add support for "regular" device ioctls here.
1466 */
1467
1468 switch (cmd) {
1469 case DIOCGDINFO:
1470 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1471 break;
1472 #ifdef __HAVE_OLD_DISKLABEL
1473 case ODIOCGDINFO:
1474 newlabel = *(rs->sc_dkdev.dk_label);
1475 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1476 return ENOTTY;
1477 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1478 break;
1479 #endif
1480
1481 case DIOCGPART:
1482 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1483 ((struct partinfo *) data)->part =
1484 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1485 break;
1486
1487 case DIOCWDINFO:
1488 case DIOCSDINFO:
1489 #ifdef __HAVE_OLD_DISKLABEL
1490 case ODIOCWDINFO:
1491 case ODIOCSDINFO:
1492 #endif
1493 {
1494 struct disklabel *lp;
1495 #ifdef __HAVE_OLD_DISKLABEL
1496 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1497 memset(&newlabel, 0, sizeof newlabel);
1498 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1499 lp = &newlabel;
1500 } else
1501 #endif
1502 lp = (struct disklabel *)data;
1503
1504 if ((error = raidlock(rs)) != 0)
1505 return (error);
1506
1507 rs->sc_flags |= RAIDF_LABELLING;
1508
1509 error = setdisklabel(rs->sc_dkdev.dk_label,
1510 lp, 0, rs->sc_dkdev.dk_cpulabel);
1511 if (error == 0) {
1512 if (cmd == DIOCWDINFO
1513 #ifdef __HAVE_OLD_DISKLABEL
1514 || cmd == ODIOCWDINFO
1515 #endif
1516 )
1517 error = writedisklabel(RAIDLABELDEV(dev),
1518 raidstrategy, rs->sc_dkdev.dk_label,
1519 rs->sc_dkdev.dk_cpulabel);
1520 }
1521 rs->sc_flags &= ~RAIDF_LABELLING;
1522
1523 raidunlock(rs);
1524
1525 if (error)
1526 return (error);
1527 break;
1528 }
1529
1530 case DIOCWLABEL:
1531 if (*(int *) data != 0)
1532 rs->sc_flags |= RAIDF_WLABEL;
1533 else
1534 rs->sc_flags &= ~RAIDF_WLABEL;
1535 break;
1536
1537 case DIOCGDEFLABEL:
1538 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1539 break;
1540
1541 #ifdef __HAVE_OLD_DISKLABEL
1542 case ODIOCGDEFLABEL:
1543 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1544 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1545 return ENOTTY;
1546 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1547 break;
1548 #endif
1549
1550 default:
1551 retcode = ENOTTY;
1552 }
1553 return (retcode);
1554
1555 }
1556
1557
1558 /* raidinit -- complete the rest of the initialization for the
1559 RAIDframe device. */
1560
1561
1562 static void
1563 raidinit(raidPtr)
1564 RF_Raid_t *raidPtr;
1565 {
1566 struct raid_softc *rs;
1567 int unit;
1568
1569 unit = raidPtr->raidid;
1570
1571 rs = &raid_softc[unit];
1572 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1573 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1574
1575
1576 /* XXX should check return code first... */
1577 rs->sc_flags |= RAIDF_INITED;
1578
1579 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1580
1581 rs->sc_dkdev.dk_name = rs->sc_xname;
1582
1583 /* disk_attach actually creates space for the CPU disklabel, among
1584 * other things, so it's critical to call this *BEFORE* we try putzing
1585 * with disklabels. */
1586
1587 disk_attach(&rs->sc_dkdev);
1588
1589 /* XXX There may be a weird interaction here between this, and
1590 * protectedSectors, as used in RAIDframe. */
1591
1592 rs->sc_size = raidPtr->totalSectors;
1593
1594 }
1595
1596 /* wake up the daemon & tell it to get us a spare table
1597 * XXX
1598 * the entries in the queues should be tagged with the raidPtr
1599 * so that in the extremely rare case that two recons happen at once,
1600 * we know for which device were requesting a spare table
1601 * XXX
1602 *
1603 * XXX This code is not currently used. GO
1604 */
1605 int
1606 rf_GetSpareTableFromDaemon(req)
1607 RF_SparetWait_t *req;
1608 {
1609 int retcode;
1610
1611 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1612 req->next = rf_sparet_wait_queue;
1613 rf_sparet_wait_queue = req;
1614 wakeup(&rf_sparet_wait_queue);
1615
1616 /* mpsleep unlocks the mutex */
1617 while (!rf_sparet_resp_queue) {
1618 tsleep(&rf_sparet_resp_queue, PRIBIO,
1619 "raidframe getsparetable", 0);
1620 }
1621 req = rf_sparet_resp_queue;
1622 rf_sparet_resp_queue = req->next;
1623 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1624
1625 retcode = req->fcol;
1626 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1627 * alloc'd */
1628 return (retcode);
1629 }
1630
1631 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1632 * bp & passes it down.
1633 * any calls originating in the kernel must use non-blocking I/O
1634 * do some extra sanity checking to return "appropriate" error values for
1635 * certain conditions (to make some standard utilities work)
1636 *
1637 * Formerly known as: rf_DoAccessKernel
1638 */
1639 void
1640 raidstart(raidPtr)
1641 RF_Raid_t *raidPtr;
1642 {
1643 RF_SectorCount_t num_blocks, pb, sum;
1644 RF_RaidAddr_t raid_addr;
1645 int retcode;
1646 struct partition *pp;
1647 daddr_t blocknum;
1648 int unit;
1649 struct raid_softc *rs;
1650 int do_async;
1651 struct buf *bp;
1652
1653 unit = raidPtr->raidid;
1654 rs = &raid_softc[unit];
1655
1656 /* quick check to see if anything has died recently */
1657 RF_LOCK_MUTEX(raidPtr->mutex);
1658 if (raidPtr->numNewFailures > 0) {
1659 rf_update_component_labels(raidPtr,
1660 RF_NORMAL_COMPONENT_UPDATE);
1661 raidPtr->numNewFailures--;
1662 }
1663 RF_UNLOCK_MUTEX(raidPtr->mutex);
1664
1665 /* Check to see if we're at the limit... */
1666 RF_LOCK_MUTEX(raidPtr->mutex);
1667 while (raidPtr->openings > 0) {
1668 RF_UNLOCK_MUTEX(raidPtr->mutex);
1669
1670 /* get the next item, if any, from the queue */
1671 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1672 /* nothing more to do */
1673 return;
1674 }
1675 BUFQ_REMOVE(&rs->buf_queue, bp);
1676
1677 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1678 * partition.. Need to make it absolute to the underlying
1679 * device.. */
1680
1681 blocknum = bp->b_blkno;
1682 if (DISKPART(bp->b_dev) != RAW_PART) {
1683 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1684 blocknum += pp->p_offset;
1685 }
1686
1687 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1688 (int) blocknum));
1689
1690 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1691 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1692
1693 /* *THIS* is where we adjust what block we're going to...
1694 * but DO NOT TOUCH bp->b_blkno!!! */
1695 raid_addr = blocknum;
1696
1697 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1698 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1699 sum = raid_addr + num_blocks + pb;
1700 if (1 || rf_debugKernelAccess) {
1701 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1702 (int) raid_addr, (int) sum, (int) num_blocks,
1703 (int) pb, (int) bp->b_resid));
1704 }
1705 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1706 || (sum < num_blocks) || (sum < pb)) {
1707 bp->b_error = ENOSPC;
1708 bp->b_flags |= B_ERROR;
1709 bp->b_resid = bp->b_bcount;
1710 biodone(bp);
1711 RF_LOCK_MUTEX(raidPtr->mutex);
1712 continue;
1713 }
1714 /*
1715 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1716 */
1717
1718 if (bp->b_bcount & raidPtr->sectorMask) {
1719 bp->b_error = EINVAL;
1720 bp->b_flags |= B_ERROR;
1721 bp->b_resid = bp->b_bcount;
1722 biodone(bp);
1723 RF_LOCK_MUTEX(raidPtr->mutex);
1724 continue;
1725
1726 }
1727 db1_printf(("Calling DoAccess..\n"));
1728
1729
1730 RF_LOCK_MUTEX(raidPtr->mutex);
1731 raidPtr->openings--;
1732 RF_UNLOCK_MUTEX(raidPtr->mutex);
1733
1734 /*
1735 * Everything is async.
1736 */
1737 do_async = 1;
1738
1739 disk_busy(&rs->sc_dkdev);
1740
1741 /* XXX we're still at splbio() here... do we *really*
1742 need to be? */
1743
1744 /* don't ever condition on bp->b_flags & B_WRITE.
1745 * always condition on B_READ instead */
1746
1747 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1748 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1749 do_async, raid_addr, num_blocks,
1750 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1751
1752 RF_LOCK_MUTEX(raidPtr->mutex);
1753 }
1754 RF_UNLOCK_MUTEX(raidPtr->mutex);
1755 }
1756
1757
1758
1759
1760 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1761
1762 int
1763 rf_DispatchKernelIO(queue, req)
1764 RF_DiskQueue_t *queue;
1765 RF_DiskQueueData_t *req;
1766 {
1767 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1768 struct buf *bp;
1769 struct raidbuf *raidbp = NULL;
1770 struct raid_softc *rs;
1771 int unit;
1772 int s;
1773
1774 s=0;
1775 /* s = splbio();*/ /* want to test this */
1776 /* XXX along with the vnode, we also need the softc associated with
1777 * this device.. */
1778
1779 req->queue = queue;
1780
1781 unit = queue->raidPtr->raidid;
1782
1783 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1784
1785 if (unit >= numraid) {
1786 printf("Invalid unit number: %d %d\n", unit, numraid);
1787 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1788 }
1789 rs = &raid_softc[unit];
1790
1791 bp = req->bp;
1792 #if 1
1793 /* XXX when there is a physical disk failure, someone is passing us a
1794 * buffer that contains old stuff!! Attempt to deal with this problem
1795 * without taking a performance hit... (not sure where the real bug
1796 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1797
1798 if (bp->b_flags & B_ERROR) {
1799 bp->b_flags &= ~B_ERROR;
1800 }
1801 if (bp->b_error != 0) {
1802 bp->b_error = 0;
1803 }
1804 #endif
1805 raidbp = RAIDGETBUF(rs);
1806
1807 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1808
1809 /*
1810 * context for raidiodone
1811 */
1812 raidbp->rf_obp = bp;
1813 raidbp->req = req;
1814
1815 LIST_INIT(&raidbp->rf_buf.b_dep);
1816
1817 switch (req->type) {
1818 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1819 /* XXX need to do something extra here.. */
1820 /* I'm leaving this in, as I've never actually seen it used,
1821 * and I'd like folks to report it... GO */
1822 printf(("WAKEUP CALLED\n"));
1823 queue->numOutstanding++;
1824
1825 /* XXX need to glue the original buffer into this?? */
1826
1827 KernelWakeupFunc(&raidbp->rf_buf);
1828 break;
1829
1830 case RF_IO_TYPE_READ:
1831 case RF_IO_TYPE_WRITE:
1832
1833 if (req->tracerec) {
1834 RF_ETIMER_START(req->tracerec->timer);
1835 }
1836 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1837 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1838 req->sectorOffset, req->numSector,
1839 req->buf, KernelWakeupFunc, (void *) req,
1840 queue->raidPtr->logBytesPerSector, req->b_proc);
1841
1842 if (rf_debugKernelAccess) {
1843 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1844 (long) bp->b_blkno));
1845 }
1846 queue->numOutstanding++;
1847 queue->last_deq_sector = req->sectorOffset;
1848 /* acc wouldn't have been let in if there were any pending
1849 * reqs at any other priority */
1850 queue->curPriority = req->priority;
1851
1852 db1_printf(("Going for %c to unit %d row %d col %d\n",
1853 req->type, unit, queue->row, queue->col));
1854 db1_printf(("sector %d count %d (%d bytes) %d\n",
1855 (int) req->sectorOffset, (int) req->numSector,
1856 (int) (req->numSector <<
1857 queue->raidPtr->logBytesPerSector),
1858 (int) queue->raidPtr->logBytesPerSector));
1859 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1860 raidbp->rf_buf.b_vp->v_numoutput++;
1861 }
1862 VOP_STRATEGY(&raidbp->rf_buf);
1863
1864 break;
1865
1866 default:
1867 panic("bad req->type in rf_DispatchKernelIO");
1868 }
1869 db1_printf(("Exiting from DispatchKernelIO\n"));
1870 /* splx(s); */ /* want to test this */
1871 return (0);
1872 }
1873 /* this is the callback function associated with a I/O invoked from
1874 kernel code.
1875 */
1876 static void
1877 KernelWakeupFunc(vbp)
1878 struct buf *vbp;
1879 {
1880 RF_DiskQueueData_t *req = NULL;
1881 RF_DiskQueue_t *queue;
1882 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1883 struct buf *bp;
1884 struct raid_softc *rs;
1885 int unit;
1886 int s;
1887
1888 s = splbio();
1889 db1_printf(("recovering the request queue:\n"));
1890 req = raidbp->req;
1891
1892 bp = raidbp->rf_obp;
1893
1894 queue = (RF_DiskQueue_t *) req->queue;
1895
1896 if (raidbp->rf_buf.b_flags & B_ERROR) {
1897 bp->b_flags |= B_ERROR;
1898 bp->b_error = raidbp->rf_buf.b_error ?
1899 raidbp->rf_buf.b_error : EIO;
1900 }
1901
1902 /* XXX methinks this could be wrong... */
1903 #if 1
1904 bp->b_resid = raidbp->rf_buf.b_resid;
1905 #endif
1906
1907 if (req->tracerec) {
1908 RF_ETIMER_STOP(req->tracerec->timer);
1909 RF_ETIMER_EVAL(req->tracerec->timer);
1910 RF_LOCK_MUTEX(rf_tracing_mutex);
1911 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1912 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1913 req->tracerec->num_phys_ios++;
1914 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1915 }
1916 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1917
1918 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1919
1920
1921 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1922 * ballistic, and mark the component as hosed... */
1923
1924 if (bp->b_flags & B_ERROR) {
1925 /* Mark the disk as dead */
1926 /* but only mark it once... */
1927 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1928 rf_ds_optimal) {
1929 printf("raid%d: IO Error. Marking %s as failed.\n",
1930 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1931 queue->raidPtr->Disks[queue->row][queue->col].status =
1932 rf_ds_failed;
1933 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1934 queue->raidPtr->numFailures++;
1935 queue->raidPtr->numNewFailures++;
1936 } else { /* Disk is already dead... */
1937 /* printf("Disk already marked as dead!\n"); */
1938 }
1939
1940 }
1941
1942 rs = &raid_softc[unit];
1943 RAIDPUTBUF(rs, raidbp);
1944
1945 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1946 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1947
1948 splx(s);
1949 }
1950
1951
1952
1953 /*
1954 * initialize a buf structure for doing an I/O in the kernel.
1955 */
1956 static void
1957 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1958 logBytesPerSector, b_proc)
1959 struct buf *bp;
1960 struct vnode *b_vp;
1961 unsigned rw_flag;
1962 dev_t dev;
1963 RF_SectorNum_t startSect;
1964 RF_SectorCount_t numSect;
1965 caddr_t buf;
1966 void (*cbFunc) (struct buf *);
1967 void *cbArg;
1968 int logBytesPerSector;
1969 struct proc *b_proc;
1970 {
1971 /* bp->b_flags = B_PHYS | rw_flag; */
1972 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1973 bp->b_bcount = numSect << logBytesPerSector;
1974 bp->b_bufsize = bp->b_bcount;
1975 bp->b_error = 0;
1976 bp->b_dev = dev;
1977 bp->b_data = buf;
1978 bp->b_blkno = startSect;
1979 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1980 if (bp->b_bcount == 0) {
1981 panic("bp->b_bcount is zero in InitBP!!\n");
1982 }
1983 bp->b_proc = b_proc;
1984 bp->b_iodone = cbFunc;
1985 bp->b_vp = b_vp;
1986
1987 }
1988
1989 static void
1990 raidgetdefaultlabel(raidPtr, rs, lp)
1991 RF_Raid_t *raidPtr;
1992 struct raid_softc *rs;
1993 struct disklabel *lp;
1994 {
1995 db1_printf(("Building a default label...\n"));
1996 memset(lp, 0, sizeof(*lp));
1997
1998 /* fabricate a label... */
1999 lp->d_secperunit = raidPtr->totalSectors;
2000 lp->d_secsize = raidPtr->bytesPerSector;
2001 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2002 lp->d_ntracks = 4 * raidPtr->numCol;
2003 lp->d_ncylinders = raidPtr->totalSectors /
2004 (lp->d_nsectors * lp->d_ntracks);
2005 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2006
2007 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2008 lp->d_type = DTYPE_RAID;
2009 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2010 lp->d_rpm = 3600;
2011 lp->d_interleave = 1;
2012 lp->d_flags = 0;
2013
2014 lp->d_partitions[RAW_PART].p_offset = 0;
2015 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2016 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2017 lp->d_npartitions = RAW_PART + 1;
2018
2019 lp->d_magic = DISKMAGIC;
2020 lp->d_magic2 = DISKMAGIC;
2021 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2022
2023 }
2024 /*
2025 * Read the disklabel from the raid device. If one is not present, fake one
2026 * up.
2027 */
2028 static void
2029 raidgetdisklabel(dev)
2030 dev_t dev;
2031 {
2032 int unit = raidunit(dev);
2033 struct raid_softc *rs = &raid_softc[unit];
2034 char *errstring;
2035 struct disklabel *lp = rs->sc_dkdev.dk_label;
2036 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2037 RF_Raid_t *raidPtr;
2038
2039 db1_printf(("Getting the disklabel...\n"));
2040
2041 memset(clp, 0, sizeof(*clp));
2042
2043 raidPtr = raidPtrs[unit];
2044
2045 raidgetdefaultlabel(raidPtr, rs, lp);
2046
2047 /*
2048 * Call the generic disklabel extraction routine.
2049 */
2050 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2051 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2052 if (errstring)
2053 raidmakedisklabel(rs);
2054 else {
2055 int i;
2056 struct partition *pp;
2057
2058 /*
2059 * Sanity check whether the found disklabel is valid.
2060 *
2061 * This is necessary since total size of the raid device
2062 * may vary when an interleave is changed even though exactly
2063 * same componets are used, and old disklabel may used
2064 * if that is found.
2065 */
2066 if (lp->d_secperunit != rs->sc_size)
2067 printf("WARNING: %s: "
2068 "total sector size in disklabel (%d) != "
2069 "the size of raid (%ld)\n", rs->sc_xname,
2070 lp->d_secperunit, (long) rs->sc_size);
2071 for (i = 0; i < lp->d_npartitions; i++) {
2072 pp = &lp->d_partitions[i];
2073 if (pp->p_offset + pp->p_size > rs->sc_size)
2074 printf("WARNING: %s: end of partition `%c' "
2075 "exceeds the size of raid (%ld)\n",
2076 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2077 }
2078 }
2079
2080 }
2081 /*
2082 * Take care of things one might want to take care of in the event
2083 * that a disklabel isn't present.
2084 */
2085 static void
2086 raidmakedisklabel(rs)
2087 struct raid_softc *rs;
2088 {
2089 struct disklabel *lp = rs->sc_dkdev.dk_label;
2090 db1_printf(("Making a label..\n"));
2091
2092 /*
2093 * For historical reasons, if there's no disklabel present
2094 * the raw partition must be marked FS_BSDFFS.
2095 */
2096
2097 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2098
2099 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2100
2101 lp->d_checksum = dkcksum(lp);
2102 }
2103 /*
2104 * Lookup the provided name in the filesystem. If the file exists,
2105 * is a valid block device, and isn't being used by anyone else,
2106 * set *vpp to the file's vnode.
2107 * You'll find the original of this in ccd.c
2108 */
2109 int
2110 raidlookup(path, p, vpp)
2111 char *path;
2112 struct proc *p;
2113 struct vnode **vpp; /* result */
2114 {
2115 struct nameidata nd;
2116 struct vnode *vp;
2117 struct vattr va;
2118 int error;
2119
2120 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2121 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2122 #ifdef DEBUG
2123 printf("RAIDframe: vn_open returned %d\n", error);
2124 #endif
2125 return (error);
2126 }
2127 vp = nd.ni_vp;
2128 if (vp->v_usecount > 1) {
2129 VOP_UNLOCK(vp, 0);
2130 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2131 return (EBUSY);
2132 }
2133 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2134 VOP_UNLOCK(vp, 0);
2135 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2136 return (error);
2137 }
2138 /* XXX: eventually we should handle VREG, too. */
2139 if (va.va_type != VBLK) {
2140 VOP_UNLOCK(vp, 0);
2141 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2142 return (ENOTBLK);
2143 }
2144 VOP_UNLOCK(vp, 0);
2145 *vpp = vp;
2146 return (0);
2147 }
2148 /*
2149 * Wait interruptibly for an exclusive lock.
2150 *
2151 * XXX
2152 * Several drivers do this; it should be abstracted and made MP-safe.
2153 * (Hmm... where have we seen this warning before :-> GO )
2154 */
2155 static int
2156 raidlock(rs)
2157 struct raid_softc *rs;
2158 {
2159 int error;
2160
2161 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2162 rs->sc_flags |= RAIDF_WANTED;
2163 if ((error =
2164 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2165 return (error);
2166 }
2167 rs->sc_flags |= RAIDF_LOCKED;
2168 return (0);
2169 }
2170 /*
2171 * Unlock and wake up any waiters.
2172 */
2173 static void
2174 raidunlock(rs)
2175 struct raid_softc *rs;
2176 {
2177
2178 rs->sc_flags &= ~RAIDF_LOCKED;
2179 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2180 rs->sc_flags &= ~RAIDF_WANTED;
2181 wakeup(rs);
2182 }
2183 }
2184
2185
2186 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2187 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2188
2189 int
2190 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2191 {
2192 RF_ComponentLabel_t clabel;
2193 raidread_component_label(dev, b_vp, &clabel);
2194 clabel.mod_counter = mod_counter;
2195 clabel.clean = RF_RAID_CLEAN;
2196 raidwrite_component_label(dev, b_vp, &clabel);
2197 return(0);
2198 }
2199
2200
2201 int
2202 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2203 {
2204 RF_ComponentLabel_t clabel;
2205 raidread_component_label(dev, b_vp, &clabel);
2206 clabel.mod_counter = mod_counter;
2207 clabel.clean = RF_RAID_DIRTY;
2208 raidwrite_component_label(dev, b_vp, &clabel);
2209 return(0);
2210 }
2211
2212 /* ARGSUSED */
2213 int
2214 raidread_component_label(dev, b_vp, clabel)
2215 dev_t dev;
2216 struct vnode *b_vp;
2217 RF_ComponentLabel_t *clabel;
2218 {
2219 struct buf *bp;
2220 int error;
2221
2222 /* XXX should probably ensure that we don't try to do this if
2223 someone has changed rf_protected_sectors. */
2224
2225 if (b_vp == NULL) {
2226 /* For whatever reason, this component is not valid.
2227 Don't try to read a component label from it. */
2228 return(EINVAL);
2229 }
2230
2231 /* get a block of the appropriate size... */
2232 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2233 bp->b_dev = dev;
2234
2235 /* get our ducks in a row for the read */
2236 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2237 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2238 bp->b_flags |= B_READ;
2239 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2240
2241 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2242
2243 error = biowait(bp);
2244
2245 if (!error) {
2246 memcpy(clabel, bp->b_data,
2247 sizeof(RF_ComponentLabel_t));
2248 #if 0
2249 rf_print_component_label( clabel );
2250 #endif
2251 } else {
2252 #if 0
2253 printf("Failed to read RAID component label!\n");
2254 #endif
2255 }
2256
2257 brelse(bp);
2258 return(error);
2259 }
2260 /* ARGSUSED */
2261 int
2262 raidwrite_component_label(dev, b_vp, clabel)
2263 dev_t dev;
2264 struct vnode *b_vp;
2265 RF_ComponentLabel_t *clabel;
2266 {
2267 struct buf *bp;
2268 int error;
2269
2270 /* get a block of the appropriate size... */
2271 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2272 bp->b_dev = dev;
2273
2274 /* get our ducks in a row for the write */
2275 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2276 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2277 bp->b_flags |= B_WRITE;
2278 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2279
2280 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2281
2282 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2283
2284 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2285 error = biowait(bp);
2286 brelse(bp);
2287 if (error) {
2288 #if 1
2289 printf("Failed to write RAID component info!\n");
2290 #endif
2291 }
2292
2293 return(error);
2294 }
2295
2296 void
2297 rf_markalldirty(raidPtr)
2298 RF_Raid_t *raidPtr;
2299 {
2300 RF_ComponentLabel_t clabel;
2301 int r,c;
2302
2303 raidPtr->mod_counter++;
2304 for (r = 0; r < raidPtr->numRow; r++) {
2305 for (c = 0; c < raidPtr->numCol; c++) {
2306 /* we don't want to touch (at all) a disk that has
2307 failed */
2308 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2309 raidread_component_label(
2310 raidPtr->Disks[r][c].dev,
2311 raidPtr->raid_cinfo[r][c].ci_vp,
2312 &clabel);
2313 if (clabel.status == rf_ds_spared) {
2314 /* XXX do something special...
2315 but whatever you do, don't
2316 try to access it!! */
2317 } else {
2318 #if 0
2319 clabel.status =
2320 raidPtr->Disks[r][c].status;
2321 raidwrite_component_label(
2322 raidPtr->Disks[r][c].dev,
2323 raidPtr->raid_cinfo[r][c].ci_vp,
2324 &clabel);
2325 #endif
2326 raidmarkdirty(
2327 raidPtr->Disks[r][c].dev,
2328 raidPtr->raid_cinfo[r][c].ci_vp,
2329 raidPtr->mod_counter);
2330 }
2331 }
2332 }
2333 }
2334 /* printf("Component labels marked dirty.\n"); */
2335 #if 0
2336 for( c = 0; c < raidPtr->numSpare ; c++) {
2337 sparecol = raidPtr->numCol + c;
2338 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2339 /*
2340
2341 XXX this is where we get fancy and map this spare
2342 into it's correct spot in the array.
2343
2344 */
2345 /*
2346
2347 we claim this disk is "optimal" if it's
2348 rf_ds_used_spare, as that means it should be
2349 directly substitutable for the disk it replaced.
2350 We note that too...
2351
2352 */
2353
2354 for(i=0;i<raidPtr->numRow;i++) {
2355 for(j=0;j<raidPtr->numCol;j++) {
2356 if ((raidPtr->Disks[i][j].spareRow ==
2357 r) &&
2358 (raidPtr->Disks[i][j].spareCol ==
2359 sparecol)) {
2360 srow = r;
2361 scol = sparecol;
2362 break;
2363 }
2364 }
2365 }
2366
2367 raidread_component_label(
2368 raidPtr->Disks[r][sparecol].dev,
2369 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2370 &clabel);
2371 /* make sure status is noted */
2372 clabel.version = RF_COMPONENT_LABEL_VERSION;
2373 clabel.mod_counter = raidPtr->mod_counter;
2374 clabel.serial_number = raidPtr->serial_number;
2375 clabel.row = srow;
2376 clabel.column = scol;
2377 clabel.num_rows = raidPtr->numRow;
2378 clabel.num_columns = raidPtr->numCol;
2379 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2380 clabel.status = rf_ds_optimal;
2381 raidwrite_component_label(
2382 raidPtr->Disks[r][sparecol].dev,
2383 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2384 &clabel);
2385 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2386 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2387 }
2388 }
2389
2390 #endif
2391 }
2392
2393
2394 void
2395 rf_update_component_labels(raidPtr, final)
2396 RF_Raid_t *raidPtr;
2397 int final;
2398 {
2399 RF_ComponentLabel_t clabel;
2400 int sparecol;
2401 int r,c;
2402 int i,j;
2403 int srow, scol;
2404
2405 srow = -1;
2406 scol = -1;
2407
2408 /* XXX should do extra checks to make sure things really are clean,
2409 rather than blindly setting the clean bit... */
2410
2411 raidPtr->mod_counter++;
2412
2413 for (r = 0; r < raidPtr->numRow; r++) {
2414 for (c = 0; c < raidPtr->numCol; c++) {
2415 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2416 raidread_component_label(
2417 raidPtr->Disks[r][c].dev,
2418 raidPtr->raid_cinfo[r][c].ci_vp,
2419 &clabel);
2420 /* make sure status is noted */
2421 clabel.status = rf_ds_optimal;
2422 /* bump the counter */
2423 clabel.mod_counter = raidPtr->mod_counter;
2424
2425 raidwrite_component_label(
2426 raidPtr->Disks[r][c].dev,
2427 raidPtr->raid_cinfo[r][c].ci_vp,
2428 &clabel);
2429 if (final == RF_FINAL_COMPONENT_UPDATE) {
2430 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2431 raidmarkclean(
2432 raidPtr->Disks[r][c].dev,
2433 raidPtr->raid_cinfo[r][c].ci_vp,
2434 raidPtr->mod_counter);
2435 }
2436 }
2437 }
2438 /* else we don't touch it.. */
2439 }
2440 }
2441
2442 for( c = 0; c < raidPtr->numSpare ; c++) {
2443 sparecol = raidPtr->numCol + c;
2444 /* Need to ensure that the reconstruct actually completed! */
2445 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2446 /*
2447
2448 we claim this disk is "optimal" if it's
2449 rf_ds_used_spare, as that means it should be
2450 directly substitutable for the disk it replaced.
2451 We note that too...
2452
2453 */
2454
2455 for(i=0;i<raidPtr->numRow;i++) {
2456 for(j=0;j<raidPtr->numCol;j++) {
2457 if ((raidPtr->Disks[i][j].spareRow ==
2458 0) &&
2459 (raidPtr->Disks[i][j].spareCol ==
2460 sparecol)) {
2461 srow = i;
2462 scol = j;
2463 break;
2464 }
2465 }
2466 }
2467
2468 /* XXX shouldn't *really* need this... */
2469 raidread_component_label(
2470 raidPtr->Disks[0][sparecol].dev,
2471 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2472 &clabel);
2473 /* make sure status is noted */
2474
2475 raid_init_component_label(raidPtr, &clabel);
2476
2477 clabel.mod_counter = raidPtr->mod_counter;
2478 clabel.row = srow;
2479 clabel.column = scol;
2480 clabel.status = rf_ds_optimal;
2481
2482 raidwrite_component_label(
2483 raidPtr->Disks[0][sparecol].dev,
2484 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2485 &clabel);
2486 if (final == RF_FINAL_COMPONENT_UPDATE) {
2487 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2488 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2489 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2490 raidPtr->mod_counter);
2491 }
2492 }
2493 }
2494 }
2495 /* printf("Component labels updated\n"); */
2496 }
2497
2498 void
2499 rf_close_component(raidPtr, vp, auto_configured)
2500 RF_Raid_t *raidPtr;
2501 struct vnode *vp;
2502 int auto_configured;
2503 {
2504 struct proc *p;
2505
2506 p = raidPtr->engine_thread;
2507
2508 if (vp != NULL) {
2509 if (auto_configured == 1) {
2510 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2511 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2512 vput(vp);
2513
2514 } else {
2515 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2516 }
2517 } else {
2518 printf("vnode was NULL\n");
2519 }
2520 }
2521
2522
2523 void
2524 rf_UnconfigureVnodes(raidPtr)
2525 RF_Raid_t *raidPtr;
2526 {
2527 int r,c;
2528 struct proc *p;
2529 struct vnode *vp;
2530 int acd;
2531
2532
2533 /* We take this opportunity to close the vnodes like we should.. */
2534
2535 p = raidPtr->engine_thread;
2536
2537 for (r = 0; r < raidPtr->numRow; r++) {
2538 for (c = 0; c < raidPtr->numCol; c++) {
2539 printf("Closing vnode for row: %d col: %d\n", r, c);
2540 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2541 acd = raidPtr->Disks[r][c].auto_configured;
2542 rf_close_component(raidPtr, vp, acd);
2543 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2544 raidPtr->Disks[r][c].auto_configured = 0;
2545 }
2546 }
2547 for (r = 0; r < raidPtr->numSpare; r++) {
2548 printf("Closing vnode for spare: %d\n", r);
2549 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2550 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2551 rf_close_component(raidPtr, vp, acd);
2552 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2553 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2554 }
2555 }
2556
2557
2558 void
2559 rf_ReconThread(req)
2560 struct rf_recon_req *req;
2561 {
2562 int s;
2563 RF_Raid_t *raidPtr;
2564
2565 s = splbio();
2566 raidPtr = (RF_Raid_t *) req->raidPtr;
2567 raidPtr->recon_in_progress = 1;
2568
2569 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2570 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2571
2572 /* XXX get rid of this! we don't need it at all.. */
2573 RF_Free(req, sizeof(*req));
2574
2575 raidPtr->recon_in_progress = 0;
2576 splx(s);
2577
2578 /* That's all... */
2579 kthread_exit(0); /* does not return */
2580 }
2581
2582 void
2583 rf_RewriteParityThread(raidPtr)
2584 RF_Raid_t *raidPtr;
2585 {
2586 int retcode;
2587 int s;
2588
2589 raidPtr->parity_rewrite_in_progress = 1;
2590 s = splbio();
2591 retcode = rf_RewriteParity(raidPtr);
2592 splx(s);
2593 if (retcode) {
2594 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2595 } else {
2596 /* set the clean bit! If we shutdown correctly,
2597 the clean bit on each component label will get
2598 set */
2599 raidPtr->parity_good = RF_RAID_CLEAN;
2600 }
2601 raidPtr->parity_rewrite_in_progress = 0;
2602
2603 /* Anyone waiting for us to stop? If so, inform them... */
2604 if (raidPtr->waitShutdown) {
2605 wakeup(&raidPtr->parity_rewrite_in_progress);
2606 }
2607
2608 /* That's all... */
2609 kthread_exit(0); /* does not return */
2610 }
2611
2612
2613 void
2614 rf_CopybackThread(raidPtr)
2615 RF_Raid_t *raidPtr;
2616 {
2617 int s;
2618
2619 raidPtr->copyback_in_progress = 1;
2620 s = splbio();
2621 rf_CopybackReconstructedData(raidPtr);
2622 splx(s);
2623 raidPtr->copyback_in_progress = 0;
2624
2625 /* That's all... */
2626 kthread_exit(0); /* does not return */
2627 }
2628
2629
2630 void
2631 rf_ReconstructInPlaceThread(req)
2632 struct rf_recon_req *req;
2633 {
2634 int retcode;
2635 int s;
2636 RF_Raid_t *raidPtr;
2637
2638 s = splbio();
2639 raidPtr = req->raidPtr;
2640 raidPtr->recon_in_progress = 1;
2641 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2642 RF_Free(req, sizeof(*req));
2643 raidPtr->recon_in_progress = 0;
2644 splx(s);
2645
2646 /* That's all... */
2647 kthread_exit(0); /* does not return */
2648 }
2649
2650 void
2651 rf_mountroot_hook(dev)
2652 struct device *dev;
2653 {
2654
2655 }
2656
2657
2658 RF_AutoConfig_t *
2659 rf_find_raid_components()
2660 {
2661 struct devnametobdevmaj *dtobdm;
2662 struct vnode *vp;
2663 struct disklabel label;
2664 struct device *dv;
2665 char *cd_name;
2666 dev_t dev;
2667 int error;
2668 int i;
2669 int good_one;
2670 RF_ComponentLabel_t *clabel;
2671 RF_AutoConfig_t *ac_list;
2672 RF_AutoConfig_t *ac;
2673
2674
2675 /* initialize the AutoConfig list */
2676 ac_list = NULL;
2677
2678 /* we begin by trolling through *all* the devices on the system */
2679
2680 for (dv = alldevs.tqh_first; dv != NULL;
2681 dv = dv->dv_list.tqe_next) {
2682
2683 /* we are only interested in disks... */
2684 if (dv->dv_class != DV_DISK)
2685 continue;
2686
2687 /* we don't care about floppies... */
2688 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2689 continue;
2690 }
2691
2692 /* need to find the device_name_to_block_device_major stuff */
2693 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2694 dtobdm = dev_name2blk;
2695 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2696 dtobdm++;
2697 }
2698
2699 /* get a vnode for the raw partition of this disk */
2700
2701 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2702 if (bdevvp(dev, &vp))
2703 panic("RAID can't alloc vnode");
2704
2705 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2706
2707 if (error) {
2708 /* "Who cares." Continue looking
2709 for something that exists*/
2710 vput(vp);
2711 continue;
2712 }
2713
2714 /* Ok, the disk exists. Go get the disklabel. */
2715 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2716 FREAD, NOCRED, 0);
2717 if (error) {
2718 /*
2719 * XXX can't happen - open() would
2720 * have errored out (or faked up one)
2721 */
2722 printf("can't get label for dev %s%c (%d)!?!?\n",
2723 dv->dv_xname, 'a' + RAW_PART, error);
2724 }
2725
2726 /* don't need this any more. We'll allocate it again
2727 a little later if we really do... */
2728 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2729 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2730 vput(vp);
2731
2732 for (i=0; i < label.d_npartitions; i++) {
2733 /* We only support partitions marked as RAID */
2734 if (label.d_partitions[i].p_fstype != FS_RAID)
2735 continue;
2736
2737 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2738 if (bdevvp(dev, &vp))
2739 panic("RAID can't alloc vnode");
2740
2741 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2742 if (error) {
2743 /* Whatever... */
2744 vput(vp);
2745 continue;
2746 }
2747
2748 good_one = 0;
2749
2750 clabel = (RF_ComponentLabel_t *)
2751 malloc(sizeof(RF_ComponentLabel_t),
2752 M_RAIDFRAME, M_NOWAIT);
2753 if (clabel == NULL) {
2754 /* XXX CLEANUP HERE */
2755 printf("RAID auto config: out of memory!\n");
2756 return(NULL); /* XXX probably should panic? */
2757 }
2758
2759 if (!raidread_component_label(dev, vp, clabel)) {
2760 /* Got the label. Does it look reasonable? */
2761 if (rf_reasonable_label(clabel) &&
2762 (clabel->partitionSize <=
2763 label.d_partitions[i].p_size)) {
2764 #if DEBUG
2765 printf("Component on: %s%c: %d\n",
2766 dv->dv_xname, 'a'+i,
2767 label.d_partitions[i].p_size);
2768 rf_print_component_label(clabel);
2769 #endif
2770 /* if it's reasonable, add it,
2771 else ignore it. */
2772 ac = (RF_AutoConfig_t *)
2773 malloc(sizeof(RF_AutoConfig_t),
2774 M_RAIDFRAME,
2775 M_NOWAIT);
2776 if (ac == NULL) {
2777 /* XXX should panic?? */
2778 return(NULL);
2779 }
2780
2781 sprintf(ac->devname, "%s%c",
2782 dv->dv_xname, 'a'+i);
2783 ac->dev = dev;
2784 ac->vp = vp;
2785 ac->clabel = clabel;
2786 ac->next = ac_list;
2787 ac_list = ac;
2788 good_one = 1;
2789 }
2790 }
2791 if (!good_one) {
2792 /* cleanup */
2793 free(clabel, M_RAIDFRAME);
2794 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2795 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2796 vput(vp);
2797 }
2798 }
2799 }
2800 return(ac_list);
2801 }
2802
2803 static int
2804 rf_reasonable_label(clabel)
2805 RF_ComponentLabel_t *clabel;
2806 {
2807
2808 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2809 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2810 ((clabel->clean == RF_RAID_CLEAN) ||
2811 (clabel->clean == RF_RAID_DIRTY)) &&
2812 clabel->row >=0 &&
2813 clabel->column >= 0 &&
2814 clabel->num_rows > 0 &&
2815 clabel->num_columns > 0 &&
2816 clabel->row < clabel->num_rows &&
2817 clabel->column < clabel->num_columns &&
2818 clabel->blockSize > 0 &&
2819 clabel->numBlocks > 0) {
2820 /* label looks reasonable enough... */
2821 return(1);
2822 }
2823 return(0);
2824 }
2825
2826
2827 void
2828 rf_print_component_label(clabel)
2829 RF_ComponentLabel_t *clabel;
2830 {
2831 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2832 clabel->row, clabel->column,
2833 clabel->num_rows, clabel->num_columns);
2834 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2835 clabel->version, clabel->serial_number,
2836 clabel->mod_counter);
2837 printf(" Clean: %s Status: %d\n",
2838 clabel->clean ? "Yes" : "No", clabel->status );
2839 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2840 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2841 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2842 (char) clabel->parityConfig, clabel->blockSize,
2843 clabel->numBlocks);
2844 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2845 printf(" Contains root partition: %s\n",
2846 clabel->root_partition ? "Yes" : "No" );
2847 printf(" Last configured as: raid%d\n", clabel->last_unit );
2848 #if 0
2849 printf(" Config order: %d\n", clabel->config_order);
2850 #endif
2851
2852 }
2853
2854 RF_ConfigSet_t *
2855 rf_create_auto_sets(ac_list)
2856 RF_AutoConfig_t *ac_list;
2857 {
2858 RF_AutoConfig_t *ac;
2859 RF_ConfigSet_t *config_sets;
2860 RF_ConfigSet_t *cset;
2861 RF_AutoConfig_t *ac_next;
2862
2863
2864 config_sets = NULL;
2865
2866 /* Go through the AutoConfig list, and figure out which components
2867 belong to what sets. */
2868 ac = ac_list;
2869 while(ac!=NULL) {
2870 /* we're going to putz with ac->next, so save it here
2871 for use at the end of the loop */
2872 ac_next = ac->next;
2873
2874 if (config_sets == NULL) {
2875 /* will need at least this one... */
2876 config_sets = (RF_ConfigSet_t *)
2877 malloc(sizeof(RF_ConfigSet_t),
2878 M_RAIDFRAME, M_NOWAIT);
2879 if (config_sets == NULL) {
2880 panic("rf_create_auto_sets: No memory!\n");
2881 }
2882 /* this one is easy :) */
2883 config_sets->ac = ac;
2884 config_sets->next = NULL;
2885 config_sets->rootable = 0;
2886 ac->next = NULL;
2887 } else {
2888 /* which set does this component fit into? */
2889 cset = config_sets;
2890 while(cset!=NULL) {
2891 if (rf_does_it_fit(cset, ac)) {
2892 /* looks like it matches... */
2893 ac->next = cset->ac;
2894 cset->ac = ac;
2895 break;
2896 }
2897 cset = cset->next;
2898 }
2899 if (cset==NULL) {
2900 /* didn't find a match above... new set..*/
2901 cset = (RF_ConfigSet_t *)
2902 malloc(sizeof(RF_ConfigSet_t),
2903 M_RAIDFRAME, M_NOWAIT);
2904 if (cset == NULL) {
2905 panic("rf_create_auto_sets: No memory!\n");
2906 }
2907 cset->ac = ac;
2908 ac->next = NULL;
2909 cset->next = config_sets;
2910 cset->rootable = 0;
2911 config_sets = cset;
2912 }
2913 }
2914 ac = ac_next;
2915 }
2916
2917
2918 return(config_sets);
2919 }
2920
2921 static int
2922 rf_does_it_fit(cset, ac)
2923 RF_ConfigSet_t *cset;
2924 RF_AutoConfig_t *ac;
2925 {
2926 RF_ComponentLabel_t *clabel1, *clabel2;
2927
2928 /* If this one matches the *first* one in the set, that's good
2929 enough, since the other members of the set would have been
2930 through here too... */
2931 /* note that we are not checking partitionSize here..
2932
2933 Note that we are also not checking the mod_counters here.
2934 If everything else matches execpt the mod_counter, that's
2935 good enough for this test. We will deal with the mod_counters
2936 a little later in the autoconfiguration process.
2937
2938 (clabel1->mod_counter == clabel2->mod_counter) &&
2939
2940 The reason we don't check for this is that failed disks
2941 will have lower modification counts. If those disks are
2942 not added to the set they used to belong to, then they will
2943 form their own set, which may result in 2 different sets,
2944 for example, competing to be configured at raid0, and
2945 perhaps competing to be the root filesystem set. If the
2946 wrong ones get configured, or both attempt to become /,
2947 weird behaviour and or serious lossage will occur. Thus we
2948 need to bring them into the fold here, and kick them out at
2949 a later point.
2950
2951 */
2952
2953 clabel1 = cset->ac->clabel;
2954 clabel2 = ac->clabel;
2955 if ((clabel1->version == clabel2->version) &&
2956 (clabel1->serial_number == clabel2->serial_number) &&
2957 (clabel1->num_rows == clabel2->num_rows) &&
2958 (clabel1->num_columns == clabel2->num_columns) &&
2959 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2960 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2961 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2962 (clabel1->parityConfig == clabel2->parityConfig) &&
2963 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2964 (clabel1->blockSize == clabel2->blockSize) &&
2965 (clabel1->numBlocks == clabel2->numBlocks) &&
2966 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2967 (clabel1->root_partition == clabel2->root_partition) &&
2968 (clabel1->last_unit == clabel2->last_unit) &&
2969 (clabel1->config_order == clabel2->config_order)) {
2970 /* if it get's here, it almost *has* to be a match */
2971 } else {
2972 /* it's not consistent with somebody in the set..
2973 punt */
2974 return(0);
2975 }
2976 /* all was fine.. it must fit... */
2977 return(1);
2978 }
2979
2980 int
2981 rf_have_enough_components(cset)
2982 RF_ConfigSet_t *cset;
2983 {
2984 RF_AutoConfig_t *ac;
2985 RF_AutoConfig_t *auto_config;
2986 RF_ComponentLabel_t *clabel;
2987 int r,c;
2988 int num_rows;
2989 int num_cols;
2990 int num_missing;
2991 int mod_counter;
2992 int mod_counter_found;
2993 int even_pair_failed;
2994 char parity_type;
2995
2996
2997 /* check to see that we have enough 'live' components
2998 of this set. If so, we can configure it if necessary */
2999
3000 num_rows = cset->ac->clabel->num_rows;
3001 num_cols = cset->ac->clabel->num_columns;
3002 parity_type = cset->ac->clabel->parityConfig;
3003
3004 /* XXX Check for duplicate components!?!?!? */
3005
3006 /* Determine what the mod_counter is supposed to be for this set. */
3007
3008 mod_counter_found = 0;
3009 mod_counter = 0;
3010 ac = cset->ac;
3011 while(ac!=NULL) {
3012 if (mod_counter_found==0) {
3013 mod_counter = ac->clabel->mod_counter;
3014 mod_counter_found = 1;
3015 } else {
3016 if (ac->clabel->mod_counter > mod_counter) {
3017 mod_counter = ac->clabel->mod_counter;
3018 }
3019 }
3020 ac = ac->next;
3021 }
3022
3023 num_missing = 0;
3024 auto_config = cset->ac;
3025
3026 for(r=0; r<num_rows; r++) {
3027 even_pair_failed = 0;
3028 for(c=0; c<num_cols; c++) {
3029 ac = auto_config;
3030 while(ac!=NULL) {
3031 if ((ac->clabel->row == r) &&
3032 (ac->clabel->column == c) &&
3033 (ac->clabel->mod_counter == mod_counter)) {
3034 /* it's this one... */
3035 #if DEBUG
3036 printf("Found: %s at %d,%d\n",
3037 ac->devname,r,c);
3038 #endif
3039 break;
3040 }
3041 ac=ac->next;
3042 }
3043 if (ac==NULL) {
3044 /* Didn't find one here! */
3045 /* special case for RAID 1, especially
3046 where there are more than 2
3047 components (where RAIDframe treats
3048 things a little differently :( ) */
3049 if (parity_type == '1') {
3050 if (c%2 == 0) { /* even component */
3051 even_pair_failed = 1;
3052 } else { /* odd component. If
3053 we're failed, and
3054 so is the even
3055 component, it's
3056 "Good Night, Charlie" */
3057 if (even_pair_failed == 1) {
3058 return(0);
3059 }
3060 }
3061 } else {
3062 /* normal accounting */
3063 num_missing++;
3064 }
3065 }
3066 if ((parity_type == '1') && (c%2 == 1)) {
3067 /* Just did an even component, and we didn't
3068 bail.. reset the even_pair_failed flag,
3069 and go on to the next component.... */
3070 even_pair_failed = 0;
3071 }
3072 }
3073 }
3074
3075 clabel = cset->ac->clabel;
3076
3077 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3078 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3079 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3080 /* XXX this needs to be made *much* more general */
3081 /* Too many failures */
3082 return(0);
3083 }
3084 /* otherwise, all is well, and we've got enough to take a kick
3085 at autoconfiguring this set */
3086 return(1);
3087 }
3088
3089 void
3090 rf_create_configuration(ac,config,raidPtr)
3091 RF_AutoConfig_t *ac;
3092 RF_Config_t *config;
3093 RF_Raid_t *raidPtr;
3094 {
3095 RF_ComponentLabel_t *clabel;
3096 int i;
3097
3098 clabel = ac->clabel;
3099
3100 /* 1. Fill in the common stuff */
3101 config->numRow = clabel->num_rows;
3102 config->numCol = clabel->num_columns;
3103 config->numSpare = 0; /* XXX should this be set here? */
3104 config->sectPerSU = clabel->sectPerSU;
3105 config->SUsPerPU = clabel->SUsPerPU;
3106 config->SUsPerRU = clabel->SUsPerRU;
3107 config->parityConfig = clabel->parityConfig;
3108 /* XXX... */
3109 strcpy(config->diskQueueType,"fifo");
3110 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3111 config->layoutSpecificSize = 0; /* XXX ?? */
3112
3113 while(ac!=NULL) {
3114 /* row/col values will be in range due to the checks
3115 in reasonable_label() */
3116 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3117 ac->devname);
3118 ac = ac->next;
3119 }
3120
3121 for(i=0;i<RF_MAXDBGV;i++) {
3122 config->debugVars[i][0] = NULL;
3123 }
3124 }
3125
3126 int
3127 rf_set_autoconfig(raidPtr, new_value)
3128 RF_Raid_t *raidPtr;
3129 int new_value;
3130 {
3131 RF_ComponentLabel_t clabel;
3132 struct vnode *vp;
3133 dev_t dev;
3134 int row, column;
3135
3136 raidPtr->autoconfigure = new_value;
3137 for(row=0; row<raidPtr->numRow; row++) {
3138 for(column=0; column<raidPtr->numCol; column++) {
3139 if (raidPtr->Disks[row][column].status ==
3140 rf_ds_optimal) {
3141 dev = raidPtr->Disks[row][column].dev;
3142 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3143 raidread_component_label(dev, vp, &clabel);
3144 clabel.autoconfigure = new_value;
3145 raidwrite_component_label(dev, vp, &clabel);
3146 }
3147 }
3148 }
3149 return(new_value);
3150 }
3151
3152 int
3153 rf_set_rootpartition(raidPtr, new_value)
3154 RF_Raid_t *raidPtr;
3155 int new_value;
3156 {
3157 RF_ComponentLabel_t clabel;
3158 struct vnode *vp;
3159 dev_t dev;
3160 int row, column;
3161
3162 raidPtr->root_partition = new_value;
3163 for(row=0; row<raidPtr->numRow; row++) {
3164 for(column=0; column<raidPtr->numCol; column++) {
3165 if (raidPtr->Disks[row][column].status ==
3166 rf_ds_optimal) {
3167 dev = raidPtr->Disks[row][column].dev;
3168 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3169 raidread_component_label(dev, vp, &clabel);
3170 clabel.root_partition = new_value;
3171 raidwrite_component_label(dev, vp, &clabel);
3172 }
3173 }
3174 }
3175 return(new_value);
3176 }
3177
3178 void
3179 rf_release_all_vps(cset)
3180 RF_ConfigSet_t *cset;
3181 {
3182 RF_AutoConfig_t *ac;
3183
3184 ac = cset->ac;
3185 while(ac!=NULL) {
3186 /* Close the vp, and give it back */
3187 if (ac->vp) {
3188 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3189 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3190 vput(ac->vp);
3191 ac->vp = NULL;
3192 }
3193 ac = ac->next;
3194 }
3195 }
3196
3197
3198 void
3199 rf_cleanup_config_set(cset)
3200 RF_ConfigSet_t *cset;
3201 {
3202 RF_AutoConfig_t *ac;
3203 RF_AutoConfig_t *next_ac;
3204
3205 ac = cset->ac;
3206 while(ac!=NULL) {
3207 next_ac = ac->next;
3208 /* nuke the label */
3209 free(ac->clabel, M_RAIDFRAME);
3210 /* cleanup the config structure */
3211 free(ac, M_RAIDFRAME);
3212 /* "next.." */
3213 ac = next_ac;
3214 }
3215 /* and, finally, nuke the config set */
3216 free(cset, M_RAIDFRAME);
3217 }
3218
3219
3220 void
3221 raid_init_component_label(raidPtr, clabel)
3222 RF_Raid_t *raidPtr;
3223 RF_ComponentLabel_t *clabel;
3224 {
3225 /* current version number */
3226 clabel->version = RF_COMPONENT_LABEL_VERSION;
3227 clabel->serial_number = raidPtr->serial_number;
3228 clabel->mod_counter = raidPtr->mod_counter;
3229 clabel->num_rows = raidPtr->numRow;
3230 clabel->num_columns = raidPtr->numCol;
3231 clabel->clean = RF_RAID_DIRTY; /* not clean */
3232 clabel->status = rf_ds_optimal; /* "It's good!" */
3233
3234 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3235 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3236 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3237
3238 clabel->blockSize = raidPtr->bytesPerSector;
3239 clabel->numBlocks = raidPtr->sectorsPerDisk;
3240
3241 /* XXX not portable */
3242 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3243 clabel->maxOutstanding = raidPtr->maxOutstanding;
3244 clabel->autoconfigure = raidPtr->autoconfigure;
3245 clabel->root_partition = raidPtr->root_partition;
3246 clabel->last_unit = raidPtr->raidid;
3247 clabel->config_order = raidPtr->config_order;
3248 }
3249
3250 int
3251 rf_auto_config_set(cset,unit)
3252 RF_ConfigSet_t *cset;
3253 int *unit;
3254 {
3255 RF_Raid_t *raidPtr;
3256 RF_Config_t *config;
3257 int raidID;
3258 int retcode;
3259
3260 printf("RAID autoconfigure\n");
3261
3262 retcode = 0;
3263 *unit = -1;
3264
3265 /* 1. Create a config structure */
3266
3267 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3268 M_RAIDFRAME,
3269 M_NOWAIT);
3270 if (config==NULL) {
3271 printf("Out of mem!?!?\n");
3272 /* XXX do something more intelligent here. */
3273 return(1);
3274 }
3275
3276 memset(config, 0, sizeof(RF_Config_t));
3277
3278 /* XXX raidID needs to be set correctly.. */
3279
3280 /*
3281 2. Figure out what RAID ID this one is supposed to live at
3282 See if we can get the same RAID dev that it was configured
3283 on last time..
3284 */
3285
3286 raidID = cset->ac->clabel->last_unit;
3287 if ((raidID < 0) || (raidID >= numraid)) {
3288 /* let's not wander off into lala land. */
3289 raidID = numraid - 1;
3290 }
3291 if (raidPtrs[raidID]->valid != 0) {
3292
3293 /*
3294 Nope... Go looking for an alternative...
3295 Start high so we don't immediately use raid0 if that's
3296 not taken.
3297 */
3298
3299 for(raidID = numraid; raidID >= 0; raidID--) {
3300 if (raidPtrs[raidID]->valid == 0) {
3301 /* can use this one! */
3302 break;
3303 }
3304 }
3305 }
3306
3307 if (raidID < 0) {
3308 /* punt... */
3309 printf("Unable to auto configure this set!\n");
3310 printf("(Out of RAID devs!)\n");
3311 return(1);
3312 }
3313 printf("Configuring raid%d:\n",raidID);
3314 raidPtr = raidPtrs[raidID];
3315
3316 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3317 raidPtr->raidid = raidID;
3318 raidPtr->openings = RAIDOUTSTANDING;
3319
3320 /* 3. Build the configuration structure */
3321 rf_create_configuration(cset->ac, config, raidPtr);
3322
3323 /* 4. Do the configuration */
3324 retcode = rf_Configure(raidPtr, config, cset->ac);
3325
3326 if (retcode == 0) {
3327
3328 raidinit(raidPtrs[raidID]);
3329
3330 rf_markalldirty(raidPtrs[raidID]);
3331 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3332 if (cset->ac->clabel->root_partition==1) {
3333 /* everything configured just fine. Make a note
3334 that this set is eligible to be root. */
3335 cset->rootable = 1;
3336 /* XXX do this here? */
3337 raidPtrs[raidID]->root_partition = 1;
3338 }
3339 }
3340
3341 /* 5. Cleanup */
3342 free(config, M_RAIDFRAME);
3343
3344 *unit = raidID;
3345 return(retcode);
3346 }
3347
3348 void
3349 rf_disk_unbusy(desc)
3350 RF_RaidAccessDesc_t *desc;
3351 {
3352 struct buf *bp;
3353
3354 bp = (struct buf *)desc->bp;
3355 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3356 (bp->b_bcount - bp->b_resid));
3357 }
3358