rf_netbsdkintf.c revision 1.137 1 /* $NetBSD: rf_netbsdkintf.c,v 1.137 2002/09/23 03:17:36 itojun Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.137 2002/09/23 03:17:36 itojun Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 #ifdef DEBUG
157 int rf_kdebug_level = 0;
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static void raidinit(RF_Raid_t *);
180
181 void raidattach(int);
182
183 dev_type_open(raidopen);
184 dev_type_close(raidclose);
185 dev_type_read(raidread);
186 dev_type_write(raidwrite);
187 dev_type_ioctl(raidioctl);
188 dev_type_strategy(raidstrategy);
189 dev_type_dump(raiddump);
190 dev_type_size(raidsize);
191
192 const struct bdevsw raid_bdevsw = {
193 raidopen, raidclose, raidstrategy, raidioctl,
194 raiddump, raidsize, D_DISK
195 };
196
197 const struct cdevsw raid_cdevsw = {
198 raidopen, raidclose, raidread, raidwrite, raidioctl,
199 nostop, notty, nopoll, nommap, D_DISK
200 };
201
202 /*
203 * Pilfered from ccd.c
204 */
205
206 struct raidbuf {
207 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
208 struct buf *rf_obp; /* ptr. to original I/O buf */
209 RF_DiskQueueData_t *req;/* the request that this was part of.. */
210 };
211
212 /* component buffer pool */
213 struct pool raidframe_cbufpool;
214
215 /* XXX Not sure if the following should be replacing the raidPtrs above,
216 or if it should be used in conjunction with that...
217 */
218
219 struct raid_softc {
220 int sc_flags; /* flags */
221 int sc_cflags; /* configuration flags */
222 size_t sc_size; /* size of the raid device */
223 char sc_xname[20]; /* XXX external name */
224 struct disk sc_dkdev; /* generic disk device info */
225 struct bufq_state buf_queue; /* used for the device queue */
226 };
227 /* sc_flags */
228 #define RAIDF_INITED 0x01 /* unit has been initialized */
229 #define RAIDF_WLABEL 0x02 /* label area is writable */
230 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
231 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
232 #define RAIDF_LOCKED 0x80 /* unit is locked */
233
234 #define raidunit(x) DISKUNIT(x)
235 int numraid = 0;
236
237 /*
238 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
239 * Be aware that large numbers can allow the driver to consume a lot of
240 * kernel memory, especially on writes, and in degraded mode reads.
241 *
242 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
243 * a single 64K write will typically require 64K for the old data,
244 * 64K for the old parity, and 64K for the new parity, for a total
245 * of 192K (if the parity buffer is not re-used immediately).
246 * Even it if is used immediately, that's still 128K, which when multiplied
247 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
248 *
249 * Now in degraded mode, for example, a 64K read on the above setup may
250 * require data reconstruction, which will require *all* of the 4 remaining
251 * disks to participate -- 4 * 32K/disk == 128K again.
252 */
253
254 #ifndef RAIDOUTSTANDING
255 #define RAIDOUTSTANDING 6
256 #endif
257
258 #define RAIDLABELDEV(dev) \
259 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
260
261 /* declared here, and made public, for the benefit of KVM stuff.. */
262 struct raid_softc *raid_softc;
263
264 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
265 struct disklabel *);
266 static void raidgetdisklabel(dev_t);
267 static void raidmakedisklabel(struct raid_softc *);
268
269 static int raidlock(struct raid_softc *);
270 static void raidunlock(struct raid_softc *);
271
272 static void rf_markalldirty(RF_Raid_t *);
273
274 struct device *raidrootdev;
275
276 void rf_ReconThread(struct rf_recon_req *);
277 /* XXX what I want is: */
278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
279 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
280 void rf_CopybackThread(RF_Raid_t *raidPtr);
281 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
282 void rf_buildroothack(void *);
283
284 RF_AutoConfig_t *rf_find_raid_components(void);
285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
287 static int rf_reasonable_label(RF_ComponentLabel_t *);
288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
289 int rf_set_autoconfig(RF_Raid_t *, int);
290 int rf_set_rootpartition(RF_Raid_t *, int);
291 void rf_release_all_vps(RF_ConfigSet_t *);
292 void rf_cleanup_config_set(RF_ConfigSet_t *);
293 int rf_have_enough_components(RF_ConfigSet_t *);
294 int rf_auto_config_set(RF_ConfigSet_t *, int *);
295
296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
297 allow autoconfig to take place.
298 Note that this is overridden by having
299 RAID_AUTOCONFIG as an option in the
300 kernel config file. */
301
302 void
303 raidattach(num)
304 int num;
305 {
306 int raidID;
307 int i, rc;
308 RF_AutoConfig_t *ac_list; /* autoconfig list */
309 RF_ConfigSet_t *config_sets;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 numraid = num;
324
325 /* Make some space for requested number of units... */
326
327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
328 if (raidPtrs == NULL) {
329 panic("raidPtrs is NULL!!\n");
330 }
331
332 /* Initialize the component buffer pool. */
333 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
334 0, 0, "raidpl", NULL);
335
336 rc = rf_mutex_init(&rf_sparet_wait_mutex);
337 if (rc) {
338 RF_PANIC();
339 }
340
341 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
342
343 for (i = 0; i < num; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!\n");
350
351 /* put together some datastructures like the CCD device does.. This
352 * lets us lock the device and what-not when it gets opened. */
353
354 raid_softc = (struct raid_softc *)
355 malloc(num * sizeof(struct raid_softc),
356 M_RAIDFRAME, M_NOWAIT);
357 if (raid_softc == NULL) {
358 printf("WARNING: no memory for RAIDframe driver\n");
359 return;
360 }
361
362 memset(raid_softc, 0, num * sizeof(struct raid_softc));
363
364 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
365 M_RAIDFRAME, M_NOWAIT);
366 if (raidrootdev == NULL) {
367 panic("No memory for RAIDframe driver!!?!?!\n");
368 }
369
370 for (raidID = 0; raidID < num; raidID++) {
371 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
372
373 raidrootdev[raidID].dv_class = DV_DISK;
374 raidrootdev[raidID].dv_cfdata = NULL;
375 raidrootdev[raidID].dv_unit = raidID;
376 raidrootdev[raidID].dv_parent = NULL;
377 raidrootdev[raidID].dv_flags = 0;
378 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
379
380 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
381 (RF_Raid_t *));
382 if (raidPtrs[raidID] == NULL) {
383 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
384 numraid = raidID;
385 return;
386 }
387 }
388
389 #ifdef RAID_AUTOCONFIG
390 raidautoconfig = 1;
391 #endif
392
393 if (raidautoconfig) {
394 /* 1. locate all RAID components on the system */
395
396 #if DEBUG
397 printf("Searching for raid components...\n");
398 #endif
399 ac_list = rf_find_raid_components();
400
401 /* 2. sort them into their respective sets */
402
403 config_sets = rf_create_auto_sets(ac_list);
404
405 /* 3. evaluate each set and configure the valid ones
406 This gets done in rf_buildroothack() */
407
408 /* schedule the creation of the thread to do the
409 "/ on RAID" stuff */
410
411 kthread_create(rf_buildroothack,config_sets);
412
413 }
414
415 }
416
417 void
418 rf_buildroothack(arg)
419 void *arg;
420 {
421 RF_ConfigSet_t *config_sets = arg;
422 RF_ConfigSet_t *cset;
423 RF_ConfigSet_t *next_cset;
424 int retcode;
425 int raidID;
426 int rootID;
427 int num_root;
428
429 rootID = 0;
430 num_root = 0;
431 cset = config_sets;
432 while(cset != NULL ) {
433 next_cset = cset->next;
434 if (rf_have_enough_components(cset) &&
435 cset->ac->clabel->autoconfigure==1) {
436 retcode = rf_auto_config_set(cset,&raidID);
437 if (!retcode) {
438 if (cset->rootable) {
439 rootID = raidID;
440 num_root++;
441 }
442 } else {
443 /* The autoconfig didn't work :( */
444 #if DEBUG
445 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
446 #endif
447 rf_release_all_vps(cset);
448 }
449 } else {
450 /* we're not autoconfiguring this set...
451 release the associated resources */
452 rf_release_all_vps(cset);
453 }
454 /* cleanup */
455 rf_cleanup_config_set(cset);
456 cset = next_cset;
457 }
458
459 /* we found something bootable... */
460
461 if (num_root == 1) {
462 booted_device = &raidrootdev[rootID];
463 } else if (num_root > 1) {
464 /* we can't guess.. require the user to answer... */
465 boothowto |= RB_ASKNAME;
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_update_component_labels(raidPtrs[unit],
639 RF_FINAL_COMPONENT_UPDATE);
640 if (doing_shutdown) {
641 /* last one, and we're going down, so
642 lights out for this RAID set too. */
643 error = rf_Shutdown(raidPtrs[unit]);
644
645 /* It's no longer initialized... */
646 rs->sc_flags &= ~RAIDF_INITED;
647
648 /* Detach the disk. */
649 disk_detach(&rs->sc_dkdev);
650 }
651 }
652
653 raidunlock(rs);
654 return (0);
655
656 }
657
658 void
659 raidstrategy(bp)
660 struct buf *bp;
661 {
662 int s;
663
664 unsigned int raidID = raidunit(bp->b_dev);
665 RF_Raid_t *raidPtr;
666 struct raid_softc *rs = &raid_softc[raidID];
667 struct disklabel *lp;
668 int wlabel;
669
670 if ((rs->sc_flags & RAIDF_INITED) ==0) {
671 bp->b_error = ENXIO;
672 bp->b_flags |= B_ERROR;
673 bp->b_resid = bp->b_bcount;
674 biodone(bp);
675 return;
676 }
677 if (raidID >= numraid || !raidPtrs[raidID]) {
678 bp->b_error = ENODEV;
679 bp->b_flags |= B_ERROR;
680 bp->b_resid = bp->b_bcount;
681 biodone(bp);
682 return;
683 }
684 raidPtr = raidPtrs[raidID];
685 if (!raidPtr->valid) {
686 bp->b_error = ENODEV;
687 bp->b_flags |= B_ERROR;
688 bp->b_resid = bp->b_bcount;
689 biodone(bp);
690 return;
691 }
692 if (bp->b_bcount == 0) {
693 db1_printf(("b_bcount is zero..\n"));
694 biodone(bp);
695 return;
696 }
697 lp = rs->sc_dkdev.dk_label;
698
699 /*
700 * Do bounds checking and adjust transfer. If there's an
701 * error, the bounds check will flag that for us.
702 */
703
704 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
705 if (DISKPART(bp->b_dev) != RAW_PART)
706 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
707 db1_printf(("Bounds check failed!!:%d %d\n",
708 (int) bp->b_blkno, (int) wlabel));
709 biodone(bp);
710 return;
711 }
712 s = splbio();
713
714 bp->b_resid = 0;
715
716 /* stuff it onto our queue */
717 BUFQ_PUT(&rs->buf_queue, bp);
718
719 raidstart(raidPtrs[raidID]);
720
721 splx(s);
722 }
723 /* ARGSUSED */
724 int
725 raidread(dev, uio, flags)
726 dev_t dev;
727 struct uio *uio;
728 int flags;
729 {
730 int unit = raidunit(dev);
731 struct raid_softc *rs;
732 int part;
733
734 if (unit >= numraid)
735 return (ENXIO);
736 rs = &raid_softc[unit];
737
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 part = DISKPART(dev);
741
742 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
743
744 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
745
746 }
747 /* ARGSUSED */
748 int
749 raidwrite(dev, uio, flags)
750 dev_t dev;
751 struct uio *uio;
752 int flags;
753 {
754 int unit = raidunit(dev);
755 struct raid_softc *rs;
756
757 if (unit >= numraid)
758 return (ENXIO);
759 rs = &raid_softc[unit];
760
761 if ((rs->sc_flags & RAIDF_INITED) == 0)
762 return (ENXIO);
763 db1_printf(("raidwrite\n"));
764 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
765
766 }
767
768 int
769 raidioctl(dev, cmd, data, flag, p)
770 dev_t dev;
771 u_long cmd;
772 caddr_t data;
773 int flag;
774 struct proc *p;
775 {
776 int unit = raidunit(dev);
777 int error = 0;
778 int part, pmask;
779 struct raid_softc *rs;
780 RF_Config_t *k_cfg, *u_cfg;
781 RF_Raid_t *raidPtr;
782 RF_RaidDisk_t *diskPtr;
783 RF_AccTotals_t *totals;
784 RF_DeviceConfig_t *d_cfg, **ucfgp;
785 u_char *specific_buf;
786 int retcode = 0;
787 int row;
788 int column;
789 int raidid;
790 struct rf_recon_req *rrcopy, *rr;
791 RF_ComponentLabel_t *clabel;
792 RF_ComponentLabel_t ci_label;
793 RF_ComponentLabel_t **clabel_ptr;
794 RF_SingleComponent_t *sparePtr,*componentPtr;
795 RF_SingleComponent_t hot_spare;
796 RF_SingleComponent_t component;
797 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
798 int i, j, d;
799 #ifdef __HAVE_OLD_DISKLABEL
800 struct disklabel newlabel;
801 #endif
802
803 if (unit >= numraid)
804 return (ENXIO);
805 rs = &raid_softc[unit];
806 raidPtr = raidPtrs[unit];
807
808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
809 (int) DISKPART(dev), (int) unit, (int) cmd));
810
811 /* Must be open for writes for these commands... */
812 switch (cmd) {
813 case DIOCSDINFO:
814 case DIOCWDINFO:
815 #ifdef __HAVE_OLD_DISKLABEL
816 case ODIOCWDINFO:
817 case ODIOCSDINFO:
818 #endif
819 case DIOCWLABEL:
820 if ((flag & FWRITE) == 0)
821 return (EBADF);
822 }
823
824 /* Must be initialized for these... */
825 switch (cmd) {
826 case DIOCGDINFO:
827 case DIOCSDINFO:
828 case DIOCWDINFO:
829 #ifdef __HAVE_OLD_DISKLABEL
830 case ODIOCGDINFO:
831 case ODIOCWDINFO:
832 case ODIOCSDINFO:
833 case ODIOCGDEFLABEL:
834 #endif
835 case DIOCGPART:
836 case DIOCWLABEL:
837 case DIOCGDEFLABEL:
838 case RAIDFRAME_SHUTDOWN:
839 case RAIDFRAME_REWRITEPARITY:
840 case RAIDFRAME_GET_INFO:
841 case RAIDFRAME_RESET_ACCTOTALS:
842 case RAIDFRAME_GET_ACCTOTALS:
843 case RAIDFRAME_KEEP_ACCTOTALS:
844 case RAIDFRAME_GET_SIZE:
845 case RAIDFRAME_FAIL_DISK:
846 case RAIDFRAME_COPYBACK:
847 case RAIDFRAME_CHECK_RECON_STATUS:
848 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 case RAIDFRAME_SET_COMPONENT_LABEL:
851 case RAIDFRAME_ADD_HOT_SPARE:
852 case RAIDFRAME_REMOVE_HOT_SPARE:
853 case RAIDFRAME_INIT_LABELS:
854 case RAIDFRAME_REBUILD_IN_PLACE:
855 case RAIDFRAME_CHECK_PARITY:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS:
859 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
860 case RAIDFRAME_SET_AUTOCONFIG:
861 case RAIDFRAME_SET_ROOT:
862 case RAIDFRAME_DELETE_COMPONENT:
863 case RAIDFRAME_INCORPORATE_HOT_SPARE:
864 if ((rs->sc_flags & RAIDF_INITED) == 0)
865 return (ENXIO);
866 }
867
868 switch (cmd) {
869
870 /* configure the system */
871 case RAIDFRAME_CONFIGURE:
872
873 if (raidPtr->valid) {
874 /* There is a valid RAID set running on this unit! */
875 printf("raid%d: Device already configured!\n",unit);
876 return(EINVAL);
877 }
878
879 /* copy-in the configuration information */
880 /* data points to a pointer to the configuration structure */
881
882 u_cfg = *((RF_Config_t **) data);
883 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
884 if (k_cfg == NULL) {
885 return (ENOMEM);
886 }
887 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
888 sizeof(RF_Config_t));
889 if (retcode) {
890 RF_Free(k_cfg, sizeof(RF_Config_t));
891 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
892 retcode));
893 return (retcode);
894 }
895 /* allocate a buffer for the layout-specific data, and copy it
896 * in */
897 if (k_cfg->layoutSpecificSize) {
898 if (k_cfg->layoutSpecificSize > 10000) {
899 /* sanity check */
900 RF_Free(k_cfg, sizeof(RF_Config_t));
901 return (EINVAL);
902 }
903 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
904 (u_char *));
905 if (specific_buf == NULL) {
906 RF_Free(k_cfg, sizeof(RF_Config_t));
907 return (ENOMEM);
908 }
909 retcode = copyin(k_cfg->layoutSpecific,
910 (caddr_t) specific_buf,
911 k_cfg->layoutSpecificSize);
912 if (retcode) {
913 RF_Free(k_cfg, sizeof(RF_Config_t));
914 RF_Free(specific_buf,
915 k_cfg->layoutSpecificSize);
916 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
917 retcode));
918 return (retcode);
919 }
920 } else
921 specific_buf = NULL;
922 k_cfg->layoutSpecific = specific_buf;
923
924 /* should do some kind of sanity check on the configuration.
925 * Store the sum of all the bytes in the last byte? */
926
927 /* configure the system */
928
929 /*
930 * Clear the entire RAID descriptor, just to make sure
931 * there is no stale data left in the case of a
932 * reconfiguration
933 */
934 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
935 raidPtr->raidid = unit;
936
937 retcode = rf_Configure(raidPtr, k_cfg, NULL);
938
939 if (retcode == 0) {
940
941 /* allow this many simultaneous IO's to
942 this RAID device */
943 raidPtr->openings = RAIDOUTSTANDING;
944
945 raidinit(raidPtr);
946 rf_markalldirty(raidPtr);
947 }
948 /* free the buffers. No return code here. */
949 if (k_cfg->layoutSpecificSize) {
950 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
951 }
952 RF_Free(k_cfg, sizeof(RF_Config_t));
953
954 return (retcode);
955
956 /* shutdown the system */
957 case RAIDFRAME_SHUTDOWN:
958
959 if ((error = raidlock(rs)) != 0)
960 return (error);
961
962 /*
963 * If somebody has a partition mounted, we shouldn't
964 * shutdown.
965 */
966
967 part = DISKPART(dev);
968 pmask = (1 << part);
969 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
970 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
971 (rs->sc_dkdev.dk_copenmask & pmask))) {
972 raidunlock(rs);
973 return (EBUSY);
974 }
975
976 retcode = rf_Shutdown(raidPtr);
977
978 /* It's no longer initialized... */
979 rs->sc_flags &= ~RAIDF_INITED;
980
981 /* Detach the disk. */
982 disk_detach(&rs->sc_dkdev);
983
984 raidunlock(rs);
985
986 return (retcode);
987 case RAIDFRAME_GET_COMPONENT_LABEL:
988 clabel_ptr = (RF_ComponentLabel_t **) data;
989 /* need to read the component label for the disk indicated
990 by row,column in clabel */
991
992 /* For practice, let's get it directly fromdisk, rather
993 than from the in-core copy */
994 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
995 (RF_ComponentLabel_t *));
996 if (clabel == NULL)
997 return (ENOMEM);
998
999 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1000
1001 retcode = copyin( *clabel_ptr, clabel,
1002 sizeof(RF_ComponentLabel_t));
1003
1004 if (retcode) {
1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1006 return(retcode);
1007 }
1008
1009 row = clabel->row;
1010 column = clabel->column;
1011
1012 if ((row < 0) || (row >= raidPtr->numRow) ||
1013 (column < 0) || (column >= raidPtr->numCol +
1014 raidPtr->numSpare)) {
1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1016 return(EINVAL);
1017 }
1018
1019 raidread_component_label(raidPtr->Disks[row][column].dev,
1020 raidPtr->raid_cinfo[row][column].ci_vp,
1021 clabel );
1022
1023 retcode = copyout((caddr_t) clabel,
1024 (caddr_t) *clabel_ptr,
1025 sizeof(RF_ComponentLabel_t));
1026 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1027 return (retcode);
1028
1029 case RAIDFRAME_SET_COMPONENT_LABEL:
1030 clabel = (RF_ComponentLabel_t *) data;
1031
1032 /* XXX check the label for valid stuff... */
1033 /* Note that some things *should not* get modified --
1034 the user should be re-initing the labels instead of
1035 trying to patch things.
1036 */
1037
1038 raidid = raidPtr->raidid;
1039 printf("raid%d: Got component label:\n", raidid);
1040 printf("raid%d: Version: %d\n", raidid, clabel->version);
1041 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1042 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1043 printf("raid%d: Row: %d\n", raidid, clabel->row);
1044 printf("raid%d: Column: %d\n", raidid, clabel->column);
1045 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1046 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1047 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1048 printf("raid%d: Status: %d\n", raidid, clabel->status);
1049
1050 row = clabel->row;
1051 column = clabel->column;
1052
1053 if ((row < 0) || (row >= raidPtr->numRow) ||
1054 (column < 0) || (column >= raidPtr->numCol)) {
1055 return(EINVAL);
1056 }
1057
1058 /* XXX this isn't allowed to do anything for now :-) */
1059
1060 /* XXX and before it is, we need to fill in the rest
1061 of the fields!?!?!?! */
1062 #if 0
1063 raidwrite_component_label(
1064 raidPtr->Disks[row][column].dev,
1065 raidPtr->raid_cinfo[row][column].ci_vp,
1066 clabel );
1067 #endif
1068 return (0);
1069
1070 case RAIDFRAME_INIT_LABELS:
1071 clabel = (RF_ComponentLabel_t *) data;
1072 /*
1073 we only want the serial number from
1074 the above. We get all the rest of the information
1075 from the config that was used to create this RAID
1076 set.
1077 */
1078
1079 raidPtr->serial_number = clabel->serial_number;
1080
1081 raid_init_component_label(raidPtr, &ci_label);
1082 ci_label.serial_number = clabel->serial_number;
1083
1084 for(row=0;row<raidPtr->numRow;row++) {
1085 ci_label.row = row;
1086 for(column=0;column<raidPtr->numCol;column++) {
1087 diskPtr = &raidPtr->Disks[row][column];
1088 if (!RF_DEAD_DISK(diskPtr->status)) {
1089 ci_label.partitionSize = diskPtr->partitionSize;
1090 ci_label.column = column;
1091 raidwrite_component_label(
1092 raidPtr->Disks[row][column].dev,
1093 raidPtr->raid_cinfo[row][column].ci_vp,
1094 &ci_label );
1095 }
1096 }
1097 }
1098
1099 return (retcode);
1100 case RAIDFRAME_SET_AUTOCONFIG:
1101 d = rf_set_autoconfig(raidPtr, *(int *) data);
1102 printf("raid%d: New autoconfig value is: %d\n",
1103 raidPtr->raidid, d);
1104 *(int *) data = d;
1105 return (retcode);
1106
1107 case RAIDFRAME_SET_ROOT:
1108 d = rf_set_rootpartition(raidPtr, *(int *) data);
1109 printf("raid%d: New rootpartition value is: %d\n",
1110 raidPtr->raidid, d);
1111 *(int *) data = d;
1112 return (retcode);
1113
1114 /* initialize all parity */
1115 case RAIDFRAME_REWRITEPARITY:
1116
1117 if (raidPtr->Layout.map->faultsTolerated == 0) {
1118 /* Parity for RAID 0 is trivially correct */
1119 raidPtr->parity_good = RF_RAID_CLEAN;
1120 return(0);
1121 }
1122
1123 if (raidPtr->parity_rewrite_in_progress == 1) {
1124 /* Re-write is already in progress! */
1125 return(EINVAL);
1126 }
1127
1128 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1129 rf_RewriteParityThread,
1130 raidPtr,"raid_parity");
1131 return (retcode);
1132
1133
1134 case RAIDFRAME_ADD_HOT_SPARE:
1135 sparePtr = (RF_SingleComponent_t *) data;
1136 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1137 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1138 return(retcode);
1139
1140 case RAIDFRAME_REMOVE_HOT_SPARE:
1141 return(retcode);
1142
1143 case RAIDFRAME_DELETE_COMPONENT:
1144 componentPtr = (RF_SingleComponent_t *)data;
1145 memcpy( &component, componentPtr,
1146 sizeof(RF_SingleComponent_t));
1147 retcode = rf_delete_component(raidPtr, &component);
1148 return(retcode);
1149
1150 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1151 componentPtr = (RF_SingleComponent_t *)data;
1152 memcpy( &component, componentPtr,
1153 sizeof(RF_SingleComponent_t));
1154 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1155 return(retcode);
1156
1157 case RAIDFRAME_REBUILD_IN_PLACE:
1158
1159 if (raidPtr->Layout.map->faultsTolerated == 0) {
1160 /* Can't do this on a RAID 0!! */
1161 return(EINVAL);
1162 }
1163
1164 if (raidPtr->recon_in_progress == 1) {
1165 /* a reconstruct is already in progress! */
1166 return(EINVAL);
1167 }
1168
1169 componentPtr = (RF_SingleComponent_t *) data;
1170 memcpy( &component, componentPtr,
1171 sizeof(RF_SingleComponent_t));
1172 row = component.row;
1173 column = component.column;
1174 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1175 row, column);
1176 if ((row < 0) || (row >= raidPtr->numRow) ||
1177 (column < 0) || (column >= raidPtr->numCol)) {
1178 return(EINVAL);
1179 }
1180
1181 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1182 if (rrcopy == NULL)
1183 return(ENOMEM);
1184
1185 rrcopy->raidPtr = (void *) raidPtr;
1186 rrcopy->row = row;
1187 rrcopy->col = column;
1188
1189 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1190 rf_ReconstructInPlaceThread,
1191 rrcopy,"raid_reconip");
1192 return(retcode);
1193
1194 case RAIDFRAME_GET_INFO:
1195 if (!raidPtr->valid)
1196 return (ENODEV);
1197 ucfgp = (RF_DeviceConfig_t **) data;
1198 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1199 (RF_DeviceConfig_t *));
1200 if (d_cfg == NULL)
1201 return (ENOMEM);
1202 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1203 d_cfg->rows = raidPtr->numRow;
1204 d_cfg->cols = raidPtr->numCol;
1205 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1206 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1207 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1208 return (ENOMEM);
1209 }
1210 d_cfg->nspares = raidPtr->numSpare;
1211 if (d_cfg->nspares >= RF_MAX_DISKS) {
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213 return (ENOMEM);
1214 }
1215 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1216 d = 0;
1217 for (i = 0; i < d_cfg->rows; i++) {
1218 for (j = 0; j < d_cfg->cols; j++) {
1219 d_cfg->devs[d] = raidPtr->Disks[i][j];
1220 d++;
1221 }
1222 }
1223 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1224 d_cfg->spares[i] = raidPtr->Disks[0][j];
1225 }
1226 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1227 sizeof(RF_DeviceConfig_t));
1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1229
1230 return (retcode);
1231
1232 case RAIDFRAME_CHECK_PARITY:
1233 *(int *) data = raidPtr->parity_good;
1234 return (0);
1235
1236 case RAIDFRAME_RESET_ACCTOTALS:
1237 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1238 return (0);
1239
1240 case RAIDFRAME_GET_ACCTOTALS:
1241 totals = (RF_AccTotals_t *) data;
1242 *totals = raidPtr->acc_totals;
1243 return (0);
1244
1245 case RAIDFRAME_KEEP_ACCTOTALS:
1246 raidPtr->keep_acc_totals = *(int *)data;
1247 return (0);
1248
1249 case RAIDFRAME_GET_SIZE:
1250 *(int *) data = raidPtr->totalSectors;
1251 return (0);
1252
1253 /* fail a disk & optionally start reconstruction */
1254 case RAIDFRAME_FAIL_DISK:
1255
1256 if (raidPtr->Layout.map->faultsTolerated == 0) {
1257 /* Can't do this on a RAID 0!! */
1258 return(EINVAL);
1259 }
1260
1261 rr = (struct rf_recon_req *) data;
1262
1263 if (rr->row < 0 || rr->row >= raidPtr->numRow
1264 || rr->col < 0 || rr->col >= raidPtr->numCol)
1265 return (EINVAL);
1266
1267 printf("raid%d: Failing the disk: row: %d col: %d\n",
1268 unit, rr->row, rr->col);
1269
1270 /* make a copy of the recon request so that we don't rely on
1271 * the user's buffer */
1272 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1273 if (rrcopy == NULL)
1274 return(ENOMEM);
1275 memcpy(rrcopy, rr, sizeof(*rr));
1276 rrcopy->raidPtr = (void *) raidPtr;
1277
1278 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1279 rf_ReconThread,
1280 rrcopy,"raid_recon");
1281 return (0);
1282
1283 /* invoke a copyback operation after recon on whatever disk
1284 * needs it, if any */
1285 case RAIDFRAME_COPYBACK:
1286
1287 if (raidPtr->Layout.map->faultsTolerated == 0) {
1288 /* This makes no sense on a RAID 0!! */
1289 return(EINVAL);
1290 }
1291
1292 if (raidPtr->copyback_in_progress == 1) {
1293 /* Copyback is already in progress! */
1294 return(EINVAL);
1295 }
1296
1297 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1298 rf_CopybackThread,
1299 raidPtr,"raid_copyback");
1300 return (retcode);
1301
1302 /* return the percentage completion of reconstruction */
1303 case RAIDFRAME_CHECK_RECON_STATUS:
1304 if (raidPtr->Layout.map->faultsTolerated == 0) {
1305 /* This makes no sense on a RAID 0, so tell the
1306 user it's done. */
1307 *(int *) data = 100;
1308 return(0);
1309 }
1310 row = 0; /* XXX we only consider a single row... */
1311 if (raidPtr->status[row] != rf_rs_reconstructing)
1312 *(int *) data = 100;
1313 else
1314 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1315 return (0);
1316 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1317 progressInfoPtr = (RF_ProgressInfo_t **) data;
1318 row = 0; /* XXX we only consider a single row... */
1319 if (raidPtr->status[row] != rf_rs_reconstructing) {
1320 progressInfo.remaining = 0;
1321 progressInfo.completed = 100;
1322 progressInfo.total = 100;
1323 } else {
1324 progressInfo.total =
1325 raidPtr->reconControl[row]->numRUsTotal;
1326 progressInfo.completed =
1327 raidPtr->reconControl[row]->numRUsComplete;
1328 progressInfo.remaining = progressInfo.total -
1329 progressInfo.completed;
1330 }
1331 retcode = copyout((caddr_t) &progressInfo,
1332 (caddr_t) *progressInfoPtr,
1333 sizeof(RF_ProgressInfo_t));
1334 return (retcode);
1335
1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1337 if (raidPtr->Layout.map->faultsTolerated == 0) {
1338 /* This makes no sense on a RAID 0, so tell the
1339 user it's done. */
1340 *(int *) data = 100;
1341 return(0);
1342 }
1343 if (raidPtr->parity_rewrite_in_progress == 1) {
1344 *(int *) data = 100 *
1345 raidPtr->parity_rewrite_stripes_done /
1346 raidPtr->Layout.numStripe;
1347 } else {
1348 *(int *) data = 100;
1349 }
1350 return (0);
1351
1352 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1353 progressInfoPtr = (RF_ProgressInfo_t **) data;
1354 if (raidPtr->parity_rewrite_in_progress == 1) {
1355 progressInfo.total = raidPtr->Layout.numStripe;
1356 progressInfo.completed =
1357 raidPtr->parity_rewrite_stripes_done;
1358 progressInfo.remaining = progressInfo.total -
1359 progressInfo.completed;
1360 } else {
1361 progressInfo.remaining = 0;
1362 progressInfo.completed = 100;
1363 progressInfo.total = 100;
1364 }
1365 retcode = copyout((caddr_t) &progressInfo,
1366 (caddr_t) *progressInfoPtr,
1367 sizeof(RF_ProgressInfo_t));
1368 return (retcode);
1369
1370 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1371 if (raidPtr->Layout.map->faultsTolerated == 0) {
1372 /* This makes no sense on a RAID 0 */
1373 *(int *) data = 100;
1374 return(0);
1375 }
1376 if (raidPtr->copyback_in_progress == 1) {
1377 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1378 raidPtr->Layout.numStripe;
1379 } else {
1380 *(int *) data = 100;
1381 }
1382 return (0);
1383
1384 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1385 progressInfoPtr = (RF_ProgressInfo_t **) data;
1386 if (raidPtr->copyback_in_progress == 1) {
1387 progressInfo.total = raidPtr->Layout.numStripe;
1388 progressInfo.completed =
1389 raidPtr->copyback_stripes_done;
1390 progressInfo.remaining = progressInfo.total -
1391 progressInfo.completed;
1392 } else {
1393 progressInfo.remaining = 0;
1394 progressInfo.completed = 100;
1395 progressInfo.total = 100;
1396 }
1397 retcode = copyout((caddr_t) &progressInfo,
1398 (caddr_t) *progressInfoPtr,
1399 sizeof(RF_ProgressInfo_t));
1400 return (retcode);
1401
1402 /* the sparetable daemon calls this to wait for the kernel to
1403 * need a spare table. this ioctl does not return until a
1404 * spare table is needed. XXX -- calling mpsleep here in the
1405 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1406 * -- I should either compute the spare table in the kernel,
1407 * or have a different -- XXX XXX -- interface (a different
1408 * character device) for delivering the table -- XXX */
1409 #if 0
1410 case RAIDFRAME_SPARET_WAIT:
1411 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1412 while (!rf_sparet_wait_queue)
1413 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1414 waitreq = rf_sparet_wait_queue;
1415 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1417
1418 /* structure assignment */
1419 *((RF_SparetWait_t *) data) = *waitreq;
1420
1421 RF_Free(waitreq, sizeof(*waitreq));
1422 return (0);
1423
1424 /* wakes up a process waiting on SPARET_WAIT and puts an error
1425 * code in it that will cause the dameon to exit */
1426 case RAIDFRAME_ABORT_SPARET_WAIT:
1427 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1428 waitreq->fcol = -1;
1429 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1430 waitreq->next = rf_sparet_wait_queue;
1431 rf_sparet_wait_queue = waitreq;
1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1433 wakeup(&rf_sparet_wait_queue);
1434 return (0);
1435
1436 /* used by the spare table daemon to deliver a spare table
1437 * into the kernel */
1438 case RAIDFRAME_SEND_SPARET:
1439
1440 /* install the spare table */
1441 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1442
1443 /* respond to the requestor. the return status of the spare
1444 * table installation is passed in the "fcol" field */
1445 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1446 waitreq->fcol = retcode;
1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1448 waitreq->next = rf_sparet_resp_queue;
1449 rf_sparet_resp_queue = waitreq;
1450 wakeup(&rf_sparet_resp_queue);
1451 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1452
1453 return (retcode);
1454 #endif
1455
1456 default:
1457 break; /* fall through to the os-specific code below */
1458
1459 }
1460
1461 if (!raidPtr->valid)
1462 return (EINVAL);
1463
1464 /*
1465 * Add support for "regular" device ioctls here.
1466 */
1467
1468 switch (cmd) {
1469 case DIOCGDINFO:
1470 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1471 break;
1472 #ifdef __HAVE_OLD_DISKLABEL
1473 case ODIOCGDINFO:
1474 newlabel = *(rs->sc_dkdev.dk_label);
1475 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1476 return ENOTTY;
1477 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1478 break;
1479 #endif
1480
1481 case DIOCGPART:
1482 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1483 ((struct partinfo *) data)->part =
1484 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1485 break;
1486
1487 case DIOCWDINFO:
1488 case DIOCSDINFO:
1489 #ifdef __HAVE_OLD_DISKLABEL
1490 case ODIOCWDINFO:
1491 case ODIOCSDINFO:
1492 #endif
1493 {
1494 struct disklabel *lp;
1495 #ifdef __HAVE_OLD_DISKLABEL
1496 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1497 memset(&newlabel, 0, sizeof newlabel);
1498 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1499 lp = &newlabel;
1500 } else
1501 #endif
1502 lp = (struct disklabel *)data;
1503
1504 if ((error = raidlock(rs)) != 0)
1505 return (error);
1506
1507 rs->sc_flags |= RAIDF_LABELLING;
1508
1509 error = setdisklabel(rs->sc_dkdev.dk_label,
1510 lp, 0, rs->sc_dkdev.dk_cpulabel);
1511 if (error == 0) {
1512 if (cmd == DIOCWDINFO
1513 #ifdef __HAVE_OLD_DISKLABEL
1514 || cmd == ODIOCWDINFO
1515 #endif
1516 )
1517 error = writedisklabel(RAIDLABELDEV(dev),
1518 raidstrategy, rs->sc_dkdev.dk_label,
1519 rs->sc_dkdev.dk_cpulabel);
1520 }
1521 rs->sc_flags &= ~RAIDF_LABELLING;
1522
1523 raidunlock(rs);
1524
1525 if (error)
1526 return (error);
1527 break;
1528 }
1529
1530 case DIOCWLABEL:
1531 if (*(int *) data != 0)
1532 rs->sc_flags |= RAIDF_WLABEL;
1533 else
1534 rs->sc_flags &= ~RAIDF_WLABEL;
1535 break;
1536
1537 case DIOCGDEFLABEL:
1538 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1539 break;
1540
1541 #ifdef __HAVE_OLD_DISKLABEL
1542 case ODIOCGDEFLABEL:
1543 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1544 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1545 return ENOTTY;
1546 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1547 break;
1548 #endif
1549
1550 default:
1551 retcode = ENOTTY;
1552 }
1553 return (retcode);
1554
1555 }
1556
1557
1558 /* raidinit -- complete the rest of the initialization for the
1559 RAIDframe device. */
1560
1561
1562 static void
1563 raidinit(raidPtr)
1564 RF_Raid_t *raidPtr;
1565 {
1566 struct raid_softc *rs;
1567 int unit;
1568
1569 unit = raidPtr->raidid;
1570
1571 rs = &raid_softc[unit];
1572
1573 /* XXX should check return code first... */
1574 rs->sc_flags |= RAIDF_INITED;
1575
1576 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1577
1578 rs->sc_dkdev.dk_name = rs->sc_xname;
1579
1580 /* disk_attach actually creates space for the CPU disklabel, among
1581 * other things, so it's critical to call this *BEFORE* we try putzing
1582 * with disklabels. */
1583
1584 disk_attach(&rs->sc_dkdev);
1585
1586 /* XXX There may be a weird interaction here between this, and
1587 * protectedSectors, as used in RAIDframe. */
1588
1589 rs->sc_size = raidPtr->totalSectors;
1590
1591 }
1592
1593 /* wake up the daemon & tell it to get us a spare table
1594 * XXX
1595 * the entries in the queues should be tagged with the raidPtr
1596 * so that in the extremely rare case that two recons happen at once,
1597 * we know for which device were requesting a spare table
1598 * XXX
1599 *
1600 * XXX This code is not currently used. GO
1601 */
1602 int
1603 rf_GetSpareTableFromDaemon(req)
1604 RF_SparetWait_t *req;
1605 {
1606 int retcode;
1607
1608 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1609 req->next = rf_sparet_wait_queue;
1610 rf_sparet_wait_queue = req;
1611 wakeup(&rf_sparet_wait_queue);
1612
1613 /* mpsleep unlocks the mutex */
1614 while (!rf_sparet_resp_queue) {
1615 tsleep(&rf_sparet_resp_queue, PRIBIO,
1616 "raidframe getsparetable", 0);
1617 }
1618 req = rf_sparet_resp_queue;
1619 rf_sparet_resp_queue = req->next;
1620 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1621
1622 retcode = req->fcol;
1623 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1624 * alloc'd */
1625 return (retcode);
1626 }
1627
1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1629 * bp & passes it down.
1630 * any calls originating in the kernel must use non-blocking I/O
1631 * do some extra sanity checking to return "appropriate" error values for
1632 * certain conditions (to make some standard utilities work)
1633 *
1634 * Formerly known as: rf_DoAccessKernel
1635 */
1636 void
1637 raidstart(raidPtr)
1638 RF_Raid_t *raidPtr;
1639 {
1640 RF_SectorCount_t num_blocks, pb, sum;
1641 RF_RaidAddr_t raid_addr;
1642 int retcode;
1643 struct partition *pp;
1644 daddr_t blocknum;
1645 int unit;
1646 struct raid_softc *rs;
1647 int do_async;
1648 struct buf *bp;
1649
1650 unit = raidPtr->raidid;
1651 rs = &raid_softc[unit];
1652
1653 /* quick check to see if anything has died recently */
1654 RF_LOCK_MUTEX(raidPtr->mutex);
1655 if (raidPtr->numNewFailures > 0) {
1656 rf_update_component_labels(raidPtr,
1657 RF_NORMAL_COMPONENT_UPDATE);
1658 raidPtr->numNewFailures--;
1659 }
1660
1661 /* Check to see if we're at the limit... */
1662 while (raidPtr->openings > 0) {
1663 RF_UNLOCK_MUTEX(raidPtr->mutex);
1664
1665 /* get the next item, if any, from the queue */
1666 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1667 /* nothing more to do */
1668 return;
1669 }
1670
1671 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1672 * partition.. Need to make it absolute to the underlying
1673 * device.. */
1674
1675 blocknum = bp->b_blkno;
1676 if (DISKPART(bp->b_dev) != RAW_PART) {
1677 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1678 blocknum += pp->p_offset;
1679 }
1680
1681 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1682 (int) blocknum));
1683
1684 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1685 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1686
1687 /* *THIS* is where we adjust what block we're going to...
1688 * but DO NOT TOUCH bp->b_blkno!!! */
1689 raid_addr = blocknum;
1690
1691 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1692 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1693 sum = raid_addr + num_blocks + pb;
1694 if (1 || rf_debugKernelAccess) {
1695 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1696 (int) raid_addr, (int) sum, (int) num_blocks,
1697 (int) pb, (int) bp->b_resid));
1698 }
1699 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1700 || (sum < num_blocks) || (sum < pb)) {
1701 bp->b_error = ENOSPC;
1702 bp->b_flags |= B_ERROR;
1703 bp->b_resid = bp->b_bcount;
1704 biodone(bp);
1705 RF_LOCK_MUTEX(raidPtr->mutex);
1706 continue;
1707 }
1708 /*
1709 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1710 */
1711
1712 if (bp->b_bcount & raidPtr->sectorMask) {
1713 bp->b_error = EINVAL;
1714 bp->b_flags |= B_ERROR;
1715 bp->b_resid = bp->b_bcount;
1716 biodone(bp);
1717 RF_LOCK_MUTEX(raidPtr->mutex);
1718 continue;
1719
1720 }
1721 db1_printf(("Calling DoAccess..\n"));
1722
1723
1724 RF_LOCK_MUTEX(raidPtr->mutex);
1725 raidPtr->openings--;
1726 RF_UNLOCK_MUTEX(raidPtr->mutex);
1727
1728 /*
1729 * Everything is async.
1730 */
1731 do_async = 1;
1732
1733 disk_busy(&rs->sc_dkdev);
1734
1735 /* XXX we're still at splbio() here... do we *really*
1736 need to be? */
1737
1738 /* don't ever condition on bp->b_flags & B_WRITE.
1739 * always condition on B_READ instead */
1740
1741 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1742 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1743 do_async, raid_addr, num_blocks,
1744 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1745
1746 RF_LOCK_MUTEX(raidPtr->mutex);
1747 }
1748 RF_UNLOCK_MUTEX(raidPtr->mutex);
1749 }
1750
1751
1752
1753
1754 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1755
1756 int
1757 rf_DispatchKernelIO(queue, req)
1758 RF_DiskQueue_t *queue;
1759 RF_DiskQueueData_t *req;
1760 {
1761 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1762 struct buf *bp;
1763 struct raidbuf *raidbp = NULL;
1764
1765 req->queue = queue;
1766
1767 #if DIAGNOSTIC
1768 if (queue->raidPtr->raidid >= numraid) {
1769 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1770 numraid);
1771 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1772 }
1773 #endif
1774
1775 bp = req->bp;
1776 #if 1
1777 /* XXX when there is a physical disk failure, someone is passing us a
1778 * buffer that contains old stuff!! Attempt to deal with this problem
1779 * without taking a performance hit... (not sure where the real bug
1780 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1781
1782 if (bp->b_flags & B_ERROR) {
1783 bp->b_flags &= ~B_ERROR;
1784 }
1785 if (bp->b_error != 0) {
1786 bp->b_error = 0;
1787 }
1788 #endif
1789 raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1790
1791 /*
1792 * context for raidiodone
1793 */
1794 raidbp->rf_obp = bp;
1795 raidbp->req = req;
1796
1797 LIST_INIT(&raidbp->rf_buf.b_dep);
1798
1799 switch (req->type) {
1800 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1801 /* XXX need to do something extra here.. */
1802 /* I'm leaving this in, as I've never actually seen it used,
1803 * and I'd like folks to report it... GO */
1804 printf(("WAKEUP CALLED\n"));
1805 queue->numOutstanding++;
1806
1807 /* XXX need to glue the original buffer into this?? */
1808
1809 KernelWakeupFunc(&raidbp->rf_buf);
1810 break;
1811
1812 case RF_IO_TYPE_READ:
1813 case RF_IO_TYPE_WRITE:
1814
1815 if (req->tracerec) {
1816 RF_ETIMER_START(req->tracerec->timer);
1817 }
1818 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1819 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1820 req->sectorOffset, req->numSector,
1821 req->buf, KernelWakeupFunc, (void *) req,
1822 queue->raidPtr->logBytesPerSector, req->b_proc);
1823
1824 if (rf_debugKernelAccess) {
1825 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1826 (long) bp->b_blkno));
1827 }
1828 queue->numOutstanding++;
1829 queue->last_deq_sector = req->sectorOffset;
1830 /* acc wouldn't have been let in if there were any pending
1831 * reqs at any other priority */
1832 queue->curPriority = req->priority;
1833
1834 db1_printf(("Going for %c to unit %d row %d col %d\n",
1835 req->type, queue->raidPtr->raidid,
1836 queue->row, queue->col));
1837 db1_printf(("sector %d count %d (%d bytes) %d\n",
1838 (int) req->sectorOffset, (int) req->numSector,
1839 (int) (req->numSector <<
1840 queue->raidPtr->logBytesPerSector),
1841 (int) queue->raidPtr->logBytesPerSector));
1842 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1843 raidbp->rf_buf.b_vp->v_numoutput++;
1844 }
1845 VOP_STRATEGY(&raidbp->rf_buf);
1846
1847 break;
1848
1849 default:
1850 panic("bad req->type in rf_DispatchKernelIO");
1851 }
1852 db1_printf(("Exiting from DispatchKernelIO\n"));
1853
1854 return (0);
1855 }
1856 /* this is the callback function associated with a I/O invoked from
1857 kernel code.
1858 */
1859 static void
1860 KernelWakeupFunc(vbp)
1861 struct buf *vbp;
1862 {
1863 RF_DiskQueueData_t *req = NULL;
1864 RF_DiskQueue_t *queue;
1865 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1866 struct buf *bp;
1867 int s;
1868
1869 s = splbio();
1870 db1_printf(("recovering the request queue:\n"));
1871 req = raidbp->req;
1872
1873 bp = raidbp->rf_obp;
1874
1875 queue = (RF_DiskQueue_t *) req->queue;
1876
1877 if (raidbp->rf_buf.b_flags & B_ERROR) {
1878 bp->b_flags |= B_ERROR;
1879 bp->b_error = raidbp->rf_buf.b_error ?
1880 raidbp->rf_buf.b_error : EIO;
1881 }
1882
1883 /* XXX methinks this could be wrong... */
1884 #if 1
1885 bp->b_resid = raidbp->rf_buf.b_resid;
1886 #endif
1887
1888 if (req->tracerec) {
1889 RF_ETIMER_STOP(req->tracerec->timer);
1890 RF_ETIMER_EVAL(req->tracerec->timer);
1891 RF_LOCK_MUTEX(rf_tracing_mutex);
1892 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1893 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1894 req->tracerec->num_phys_ios++;
1895 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1896 }
1897 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1898
1899 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1900 * ballistic, and mark the component as hosed... */
1901
1902 if (bp->b_flags & B_ERROR) {
1903 /* Mark the disk as dead */
1904 /* but only mark it once... */
1905 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1906 rf_ds_optimal) {
1907 printf("raid%d: IO Error. Marking %s as failed.\n",
1908 queue->raidPtr->raidid,
1909 queue->raidPtr->Disks[queue->row][queue->col].devname);
1910 queue->raidPtr->Disks[queue->row][queue->col].status =
1911 rf_ds_failed;
1912 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1913 queue->raidPtr->numFailures++;
1914 queue->raidPtr->numNewFailures++;
1915 } else { /* Disk is already dead... */
1916 /* printf("Disk already marked as dead!\n"); */
1917 }
1918
1919 }
1920
1921 pool_put(&raidframe_cbufpool, raidbp);
1922
1923 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1924 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1925
1926 splx(s);
1927 }
1928
1929
1930
1931 /*
1932 * initialize a buf structure for doing an I/O in the kernel.
1933 */
1934 static void
1935 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1936 logBytesPerSector, b_proc)
1937 struct buf *bp;
1938 struct vnode *b_vp;
1939 unsigned rw_flag;
1940 dev_t dev;
1941 RF_SectorNum_t startSect;
1942 RF_SectorCount_t numSect;
1943 caddr_t buf;
1944 void (*cbFunc) (struct buf *);
1945 void *cbArg;
1946 int logBytesPerSector;
1947 struct proc *b_proc;
1948 {
1949 /* bp->b_flags = B_PHYS | rw_flag; */
1950 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1951 bp->b_bcount = numSect << logBytesPerSector;
1952 bp->b_bufsize = bp->b_bcount;
1953 bp->b_error = 0;
1954 bp->b_dev = dev;
1955 bp->b_data = buf;
1956 bp->b_blkno = startSect;
1957 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1958 if (bp->b_bcount == 0) {
1959 panic("bp->b_bcount is zero in InitBP!!\n");
1960 }
1961 bp->b_proc = b_proc;
1962 bp->b_iodone = cbFunc;
1963 bp->b_vp = b_vp;
1964
1965 }
1966
1967 static void
1968 raidgetdefaultlabel(raidPtr, rs, lp)
1969 RF_Raid_t *raidPtr;
1970 struct raid_softc *rs;
1971 struct disklabel *lp;
1972 {
1973 db1_printf(("Building a default label...\n"));
1974 memset(lp, 0, sizeof(*lp));
1975
1976 /* fabricate a label... */
1977 lp->d_secperunit = raidPtr->totalSectors;
1978 lp->d_secsize = raidPtr->bytesPerSector;
1979 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1980 lp->d_ntracks = 4 * raidPtr->numCol;
1981 lp->d_ncylinders = raidPtr->totalSectors /
1982 (lp->d_nsectors * lp->d_ntracks);
1983 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1984
1985 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1986 lp->d_type = DTYPE_RAID;
1987 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1988 lp->d_rpm = 3600;
1989 lp->d_interleave = 1;
1990 lp->d_flags = 0;
1991
1992 lp->d_partitions[RAW_PART].p_offset = 0;
1993 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1994 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1995 lp->d_npartitions = RAW_PART + 1;
1996
1997 lp->d_magic = DISKMAGIC;
1998 lp->d_magic2 = DISKMAGIC;
1999 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2000
2001 }
2002 /*
2003 * Read the disklabel from the raid device. If one is not present, fake one
2004 * up.
2005 */
2006 static void
2007 raidgetdisklabel(dev)
2008 dev_t dev;
2009 {
2010 int unit = raidunit(dev);
2011 struct raid_softc *rs = &raid_softc[unit];
2012 char *errstring;
2013 struct disklabel *lp = rs->sc_dkdev.dk_label;
2014 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2015 RF_Raid_t *raidPtr;
2016
2017 db1_printf(("Getting the disklabel...\n"));
2018
2019 memset(clp, 0, sizeof(*clp));
2020
2021 raidPtr = raidPtrs[unit];
2022
2023 raidgetdefaultlabel(raidPtr, rs, lp);
2024
2025 /*
2026 * Call the generic disklabel extraction routine.
2027 */
2028 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2029 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2030 if (errstring)
2031 raidmakedisklabel(rs);
2032 else {
2033 int i;
2034 struct partition *pp;
2035
2036 /*
2037 * Sanity check whether the found disklabel is valid.
2038 *
2039 * This is necessary since total size of the raid device
2040 * may vary when an interleave is changed even though exactly
2041 * same componets are used, and old disklabel may used
2042 * if that is found.
2043 */
2044 if (lp->d_secperunit != rs->sc_size)
2045 printf("raid%d: WARNING: %s: "
2046 "total sector size in disklabel (%d) != "
2047 "the size of raid (%ld)\n", unit, rs->sc_xname,
2048 lp->d_secperunit, (long) rs->sc_size);
2049 for (i = 0; i < lp->d_npartitions; i++) {
2050 pp = &lp->d_partitions[i];
2051 if (pp->p_offset + pp->p_size > rs->sc_size)
2052 printf("raid%d: WARNING: %s: end of partition `%c' "
2053 "exceeds the size of raid (%ld)\n",
2054 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2055 }
2056 }
2057
2058 }
2059 /*
2060 * Take care of things one might want to take care of in the event
2061 * that a disklabel isn't present.
2062 */
2063 static void
2064 raidmakedisklabel(rs)
2065 struct raid_softc *rs;
2066 {
2067 struct disklabel *lp = rs->sc_dkdev.dk_label;
2068 db1_printf(("Making a label..\n"));
2069
2070 /*
2071 * For historical reasons, if there's no disklabel present
2072 * the raw partition must be marked FS_BSDFFS.
2073 */
2074
2075 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2076
2077 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2078
2079 lp->d_checksum = dkcksum(lp);
2080 }
2081 /*
2082 * Lookup the provided name in the filesystem. If the file exists,
2083 * is a valid block device, and isn't being used by anyone else,
2084 * set *vpp to the file's vnode.
2085 * You'll find the original of this in ccd.c
2086 */
2087 int
2088 raidlookup(path, p, vpp)
2089 char *path;
2090 struct proc *p;
2091 struct vnode **vpp; /* result */
2092 {
2093 struct nameidata nd;
2094 struct vnode *vp;
2095 struct vattr va;
2096 int error;
2097
2098 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2099 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2100 #if 0
2101 printf("RAIDframe: vn_open returned %d\n", error);
2102 #endif
2103 return (error);
2104 }
2105 vp = nd.ni_vp;
2106 if (vp->v_usecount > 1) {
2107 VOP_UNLOCK(vp, 0);
2108 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2109 return (EBUSY);
2110 }
2111 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2112 VOP_UNLOCK(vp, 0);
2113 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2114 return (error);
2115 }
2116 /* XXX: eventually we should handle VREG, too. */
2117 if (va.va_type != VBLK) {
2118 VOP_UNLOCK(vp, 0);
2119 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2120 return (ENOTBLK);
2121 }
2122 VOP_UNLOCK(vp, 0);
2123 *vpp = vp;
2124 return (0);
2125 }
2126 /*
2127 * Wait interruptibly for an exclusive lock.
2128 *
2129 * XXX
2130 * Several drivers do this; it should be abstracted and made MP-safe.
2131 * (Hmm... where have we seen this warning before :-> GO )
2132 */
2133 static int
2134 raidlock(rs)
2135 struct raid_softc *rs;
2136 {
2137 int error;
2138
2139 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2140 rs->sc_flags |= RAIDF_WANTED;
2141 if ((error =
2142 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2143 return (error);
2144 }
2145 rs->sc_flags |= RAIDF_LOCKED;
2146 return (0);
2147 }
2148 /*
2149 * Unlock and wake up any waiters.
2150 */
2151 static void
2152 raidunlock(rs)
2153 struct raid_softc *rs;
2154 {
2155
2156 rs->sc_flags &= ~RAIDF_LOCKED;
2157 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2158 rs->sc_flags &= ~RAIDF_WANTED;
2159 wakeup(rs);
2160 }
2161 }
2162
2163
2164 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2165 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2166
2167 int
2168 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2169 {
2170 RF_ComponentLabel_t clabel;
2171 raidread_component_label(dev, b_vp, &clabel);
2172 clabel.mod_counter = mod_counter;
2173 clabel.clean = RF_RAID_CLEAN;
2174 raidwrite_component_label(dev, b_vp, &clabel);
2175 return(0);
2176 }
2177
2178
2179 int
2180 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2181 {
2182 RF_ComponentLabel_t clabel;
2183 raidread_component_label(dev, b_vp, &clabel);
2184 clabel.mod_counter = mod_counter;
2185 clabel.clean = RF_RAID_DIRTY;
2186 raidwrite_component_label(dev, b_vp, &clabel);
2187 return(0);
2188 }
2189
2190 /* ARGSUSED */
2191 int
2192 raidread_component_label(dev, b_vp, clabel)
2193 dev_t dev;
2194 struct vnode *b_vp;
2195 RF_ComponentLabel_t *clabel;
2196 {
2197 struct buf *bp;
2198 const struct bdevsw *bdev;
2199 int error;
2200
2201 /* XXX should probably ensure that we don't try to do this if
2202 someone has changed rf_protected_sectors. */
2203
2204 if (b_vp == NULL) {
2205 /* For whatever reason, this component is not valid.
2206 Don't try to read a component label from it. */
2207 return(EINVAL);
2208 }
2209
2210 /* get a block of the appropriate size... */
2211 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2212 bp->b_dev = dev;
2213
2214 /* get our ducks in a row for the read */
2215 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2216 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2217 bp->b_flags |= B_READ;
2218 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2219
2220 bdev = bdevsw_lookup(bp->b_dev);
2221 if (bdev == NULL)
2222 return (ENXIO);
2223 (*bdev->d_strategy)(bp);
2224
2225 error = biowait(bp);
2226
2227 if (!error) {
2228 memcpy(clabel, bp->b_data,
2229 sizeof(RF_ComponentLabel_t));
2230 #if 0
2231 rf_print_component_label( clabel );
2232 #endif
2233 } else {
2234 #if 0
2235 printf("Failed to read RAID component label!\n");
2236 #endif
2237 }
2238
2239 brelse(bp);
2240 return(error);
2241 }
2242 /* ARGSUSED */
2243 int
2244 raidwrite_component_label(dev, b_vp, clabel)
2245 dev_t dev;
2246 struct vnode *b_vp;
2247 RF_ComponentLabel_t *clabel;
2248 {
2249 struct buf *bp;
2250 const struct bdevsw *bdev;
2251 int error;
2252
2253 /* get a block of the appropriate size... */
2254 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2255 bp->b_dev = dev;
2256
2257 /* get our ducks in a row for the write */
2258 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2259 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2260 bp->b_flags |= B_WRITE;
2261 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2262
2263 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2264
2265 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2266
2267 bdev = bdevsw_lookup(bp->b_dev);
2268 if (bdev == NULL)
2269 return (ENXIO);
2270 (*bdev->d_strategy)(bp);
2271 error = biowait(bp);
2272 brelse(bp);
2273 if (error) {
2274 #if 1
2275 printf("Failed to write RAID component info!\n");
2276 #endif
2277 }
2278
2279 return(error);
2280 }
2281
2282 void
2283 rf_markalldirty(raidPtr)
2284 RF_Raid_t *raidPtr;
2285 {
2286 RF_ComponentLabel_t clabel;
2287 int r,c;
2288
2289 raidPtr->mod_counter++;
2290 for (r = 0; r < raidPtr->numRow; r++) {
2291 for (c = 0; c < raidPtr->numCol; c++) {
2292 /* we don't want to touch (at all) a disk that has
2293 failed */
2294 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2295 raidread_component_label(
2296 raidPtr->Disks[r][c].dev,
2297 raidPtr->raid_cinfo[r][c].ci_vp,
2298 &clabel);
2299 if (clabel.status == rf_ds_spared) {
2300 /* XXX do something special...
2301 but whatever you do, don't
2302 try to access it!! */
2303 } else {
2304 #if 0
2305 clabel.status =
2306 raidPtr->Disks[r][c].status;
2307 raidwrite_component_label(
2308 raidPtr->Disks[r][c].dev,
2309 raidPtr->raid_cinfo[r][c].ci_vp,
2310 &clabel);
2311 #endif
2312 raidmarkdirty(
2313 raidPtr->Disks[r][c].dev,
2314 raidPtr->raid_cinfo[r][c].ci_vp,
2315 raidPtr->mod_counter);
2316 }
2317 }
2318 }
2319 }
2320 /* printf("Component labels marked dirty.\n"); */
2321 #if 0
2322 for( c = 0; c < raidPtr->numSpare ; c++) {
2323 sparecol = raidPtr->numCol + c;
2324 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2325 /*
2326
2327 XXX this is where we get fancy and map this spare
2328 into it's correct spot in the array.
2329
2330 */
2331 /*
2332
2333 we claim this disk is "optimal" if it's
2334 rf_ds_used_spare, as that means it should be
2335 directly substitutable for the disk it replaced.
2336 We note that too...
2337
2338 */
2339
2340 for(i=0;i<raidPtr->numRow;i++) {
2341 for(j=0;j<raidPtr->numCol;j++) {
2342 if ((raidPtr->Disks[i][j].spareRow ==
2343 r) &&
2344 (raidPtr->Disks[i][j].spareCol ==
2345 sparecol)) {
2346 srow = r;
2347 scol = sparecol;
2348 break;
2349 }
2350 }
2351 }
2352
2353 raidread_component_label(
2354 raidPtr->Disks[r][sparecol].dev,
2355 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2356 &clabel);
2357 /* make sure status is noted */
2358 clabel.version = RF_COMPONENT_LABEL_VERSION;
2359 clabel.mod_counter = raidPtr->mod_counter;
2360 clabel.serial_number = raidPtr->serial_number;
2361 clabel.row = srow;
2362 clabel.column = scol;
2363 clabel.num_rows = raidPtr->numRow;
2364 clabel.num_columns = raidPtr->numCol;
2365 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2366 clabel.status = rf_ds_optimal;
2367 raidwrite_component_label(
2368 raidPtr->Disks[r][sparecol].dev,
2369 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2370 &clabel);
2371 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2372 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2373 }
2374 }
2375
2376 #endif
2377 }
2378
2379
2380 void
2381 rf_update_component_labels(raidPtr, final)
2382 RF_Raid_t *raidPtr;
2383 int final;
2384 {
2385 RF_ComponentLabel_t clabel;
2386 int sparecol;
2387 int r,c;
2388 int i,j;
2389 int srow, scol;
2390
2391 srow = -1;
2392 scol = -1;
2393
2394 /* XXX should do extra checks to make sure things really are clean,
2395 rather than blindly setting the clean bit... */
2396
2397 raidPtr->mod_counter++;
2398
2399 for (r = 0; r < raidPtr->numRow; r++) {
2400 for (c = 0; c < raidPtr->numCol; c++) {
2401 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2402 raidread_component_label(
2403 raidPtr->Disks[r][c].dev,
2404 raidPtr->raid_cinfo[r][c].ci_vp,
2405 &clabel);
2406 /* make sure status is noted */
2407 clabel.status = rf_ds_optimal;
2408 /* bump the counter */
2409 clabel.mod_counter = raidPtr->mod_counter;
2410
2411 raidwrite_component_label(
2412 raidPtr->Disks[r][c].dev,
2413 raidPtr->raid_cinfo[r][c].ci_vp,
2414 &clabel);
2415 if (final == RF_FINAL_COMPONENT_UPDATE) {
2416 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2417 raidmarkclean(
2418 raidPtr->Disks[r][c].dev,
2419 raidPtr->raid_cinfo[r][c].ci_vp,
2420 raidPtr->mod_counter);
2421 }
2422 }
2423 }
2424 /* else we don't touch it.. */
2425 }
2426 }
2427
2428 for( c = 0; c < raidPtr->numSpare ; c++) {
2429 sparecol = raidPtr->numCol + c;
2430 /* Need to ensure that the reconstruct actually completed! */
2431 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2432 /*
2433
2434 we claim this disk is "optimal" if it's
2435 rf_ds_used_spare, as that means it should be
2436 directly substitutable for the disk it replaced.
2437 We note that too...
2438
2439 */
2440
2441 for(i=0;i<raidPtr->numRow;i++) {
2442 for(j=0;j<raidPtr->numCol;j++) {
2443 if ((raidPtr->Disks[i][j].spareRow ==
2444 0) &&
2445 (raidPtr->Disks[i][j].spareCol ==
2446 sparecol)) {
2447 srow = i;
2448 scol = j;
2449 break;
2450 }
2451 }
2452 }
2453
2454 /* XXX shouldn't *really* need this... */
2455 raidread_component_label(
2456 raidPtr->Disks[0][sparecol].dev,
2457 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2458 &clabel);
2459 /* make sure status is noted */
2460
2461 raid_init_component_label(raidPtr, &clabel);
2462
2463 clabel.mod_counter = raidPtr->mod_counter;
2464 clabel.row = srow;
2465 clabel.column = scol;
2466 clabel.status = rf_ds_optimal;
2467
2468 raidwrite_component_label(
2469 raidPtr->Disks[0][sparecol].dev,
2470 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2471 &clabel);
2472 if (final == RF_FINAL_COMPONENT_UPDATE) {
2473 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2474 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2475 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2476 raidPtr->mod_counter);
2477 }
2478 }
2479 }
2480 }
2481 /* printf("Component labels updated\n"); */
2482 }
2483
2484 void
2485 rf_close_component(raidPtr, vp, auto_configured)
2486 RF_Raid_t *raidPtr;
2487 struct vnode *vp;
2488 int auto_configured;
2489 {
2490 struct proc *p;
2491
2492 p = raidPtr->engine_thread;
2493
2494 if (vp != NULL) {
2495 if (auto_configured == 1) {
2496 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2497 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2498 vput(vp);
2499
2500 } else {
2501 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2502 }
2503 } else {
2504 #if 0
2505 printf("vnode was NULL\n");
2506 #endif
2507 }
2508 }
2509
2510
2511 void
2512 rf_UnconfigureVnodes(raidPtr)
2513 RF_Raid_t *raidPtr;
2514 {
2515 int r,c;
2516 struct proc *p;
2517 struct vnode *vp;
2518 int acd;
2519
2520
2521 /* We take this opportunity to close the vnodes like we should.. */
2522
2523 p = raidPtr->engine_thread;
2524
2525 for (r = 0; r < raidPtr->numRow; r++) {
2526 for (c = 0; c < raidPtr->numCol; c++) {
2527 #if 0
2528 printf("raid%d: Closing vnode for row: %d col: %d\n",
2529 raidPtr->raidid, r, c);
2530 #endif
2531 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2532 acd = raidPtr->Disks[r][c].auto_configured;
2533 rf_close_component(raidPtr, vp, acd);
2534 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2535 raidPtr->Disks[r][c].auto_configured = 0;
2536 }
2537 }
2538 for (r = 0; r < raidPtr->numSpare; r++) {
2539 #if 0
2540 printf("raid%d: Closing vnode for spare: %d\n",
2541 raidPtr->raidid, r);
2542 #endif
2543 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2544 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2545 rf_close_component(raidPtr, vp, acd);
2546 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2547 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2548 }
2549 }
2550
2551
2552 void
2553 rf_ReconThread(req)
2554 struct rf_recon_req *req;
2555 {
2556 int s;
2557 RF_Raid_t *raidPtr;
2558
2559 s = splbio();
2560 raidPtr = (RF_Raid_t *) req->raidPtr;
2561 raidPtr->recon_in_progress = 1;
2562
2563 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2564 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2565
2566 /* XXX get rid of this! we don't need it at all.. */
2567 RF_Free(req, sizeof(*req));
2568
2569 raidPtr->recon_in_progress = 0;
2570 splx(s);
2571
2572 /* That's all... */
2573 kthread_exit(0); /* does not return */
2574 }
2575
2576 void
2577 rf_RewriteParityThread(raidPtr)
2578 RF_Raid_t *raidPtr;
2579 {
2580 int retcode;
2581 int s;
2582
2583 raidPtr->parity_rewrite_in_progress = 1;
2584 s = splbio();
2585 retcode = rf_RewriteParity(raidPtr);
2586 splx(s);
2587 if (retcode) {
2588 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2589 } else {
2590 /* set the clean bit! If we shutdown correctly,
2591 the clean bit on each component label will get
2592 set */
2593 raidPtr->parity_good = RF_RAID_CLEAN;
2594 }
2595 raidPtr->parity_rewrite_in_progress = 0;
2596
2597 /* Anyone waiting for us to stop? If so, inform them... */
2598 if (raidPtr->waitShutdown) {
2599 wakeup(&raidPtr->parity_rewrite_in_progress);
2600 }
2601
2602 /* That's all... */
2603 kthread_exit(0); /* does not return */
2604 }
2605
2606
2607 void
2608 rf_CopybackThread(raidPtr)
2609 RF_Raid_t *raidPtr;
2610 {
2611 int s;
2612
2613 raidPtr->copyback_in_progress = 1;
2614 s = splbio();
2615 rf_CopybackReconstructedData(raidPtr);
2616 splx(s);
2617 raidPtr->copyback_in_progress = 0;
2618
2619 /* That's all... */
2620 kthread_exit(0); /* does not return */
2621 }
2622
2623
2624 void
2625 rf_ReconstructInPlaceThread(req)
2626 struct rf_recon_req *req;
2627 {
2628 int retcode;
2629 int s;
2630 RF_Raid_t *raidPtr;
2631
2632 s = splbio();
2633 raidPtr = req->raidPtr;
2634 raidPtr->recon_in_progress = 1;
2635 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2636 RF_Free(req, sizeof(*req));
2637 raidPtr->recon_in_progress = 0;
2638 splx(s);
2639
2640 /* That's all... */
2641 kthread_exit(0); /* does not return */
2642 }
2643
2644 RF_AutoConfig_t *
2645 rf_find_raid_components()
2646 {
2647 struct vnode *vp;
2648 struct disklabel label;
2649 struct device *dv;
2650 dev_t dev;
2651 int bmajor;
2652 int error;
2653 int i;
2654 int good_one;
2655 RF_ComponentLabel_t *clabel;
2656 RF_AutoConfig_t *ac_list;
2657 RF_AutoConfig_t *ac;
2658
2659
2660 /* initialize the AutoConfig list */
2661 ac_list = NULL;
2662
2663 /* we begin by trolling through *all* the devices on the system */
2664
2665 for (dv = alldevs.tqh_first; dv != NULL;
2666 dv = dv->dv_list.tqe_next) {
2667
2668 /* we are only interested in disks... */
2669 if (dv->dv_class != DV_DISK)
2670 continue;
2671
2672 /* we don't care about floppies... */
2673 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2674 continue;
2675 }
2676
2677 /* we don't care about CD's... */
2678 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2679 continue;
2680 }
2681
2682 /* hdfd is the Atari/Hades floppy driver */
2683 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2684 continue;
2685 }
2686 /* fdisa is the Atari/Milan floppy driver */
2687 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2688 continue;
2689 }
2690
2691 /* need to find the device_name_to_block_device_major stuff */
2692 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2693
2694 /* get a vnode for the raw partition of this disk */
2695
2696 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2697 if (bdevvp(dev, &vp))
2698 panic("RAID can't alloc vnode");
2699
2700 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2701
2702 if (error) {
2703 /* "Who cares." Continue looking
2704 for something that exists*/
2705 vput(vp);
2706 continue;
2707 }
2708
2709 /* Ok, the disk exists. Go get the disklabel. */
2710 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2711 FREAD, NOCRED, 0);
2712 if (error) {
2713 /*
2714 * XXX can't happen - open() would
2715 * have errored out (or faked up one)
2716 */
2717 printf("can't get label for dev %s%c (%d)!?!?\n",
2718 dv->dv_xname, 'a' + RAW_PART, error);
2719 }
2720
2721 /* don't need this any more. We'll allocate it again
2722 a little later if we really do... */
2723 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2724 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2725 vput(vp);
2726
2727 for (i=0; i < label.d_npartitions; i++) {
2728 /* We only support partitions marked as RAID */
2729 if (label.d_partitions[i].p_fstype != FS_RAID)
2730 continue;
2731
2732 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2733 if (bdevvp(dev, &vp))
2734 panic("RAID can't alloc vnode");
2735
2736 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2737 if (error) {
2738 /* Whatever... */
2739 vput(vp);
2740 continue;
2741 }
2742
2743 good_one = 0;
2744
2745 clabel = (RF_ComponentLabel_t *)
2746 malloc(sizeof(RF_ComponentLabel_t),
2747 M_RAIDFRAME, M_NOWAIT);
2748 if (clabel == NULL) {
2749 /* XXX CLEANUP HERE */
2750 printf("RAID auto config: out of memory!\n");
2751 return(NULL); /* XXX probably should panic? */
2752 }
2753
2754 if (!raidread_component_label(dev, vp, clabel)) {
2755 /* Got the label. Does it look reasonable? */
2756 if (rf_reasonable_label(clabel) &&
2757 (clabel->partitionSize <=
2758 label.d_partitions[i].p_size)) {
2759 #if DEBUG
2760 printf("Component on: %s%c: %d\n",
2761 dv->dv_xname, 'a'+i,
2762 label.d_partitions[i].p_size);
2763 rf_print_component_label(clabel);
2764 #endif
2765 /* if it's reasonable, add it,
2766 else ignore it. */
2767 ac = (RF_AutoConfig_t *)
2768 malloc(sizeof(RF_AutoConfig_t),
2769 M_RAIDFRAME,
2770 M_NOWAIT);
2771 if (ac == NULL) {
2772 /* XXX should panic?? */
2773 return(NULL);
2774 }
2775
2776 sprintf(ac->devname, "%s%c",
2777 dv->dv_xname, 'a'+i);
2778 ac->dev = dev;
2779 ac->vp = vp;
2780 ac->clabel = clabel;
2781 ac->next = ac_list;
2782 ac_list = ac;
2783 good_one = 1;
2784 }
2785 }
2786 if (!good_one) {
2787 /* cleanup */
2788 free(clabel, M_RAIDFRAME);
2789 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2790 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2791 vput(vp);
2792 }
2793 }
2794 }
2795 return(ac_list);
2796 }
2797
2798 static int
2799 rf_reasonable_label(clabel)
2800 RF_ComponentLabel_t *clabel;
2801 {
2802
2803 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2804 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2805 ((clabel->clean == RF_RAID_CLEAN) ||
2806 (clabel->clean == RF_RAID_DIRTY)) &&
2807 clabel->row >=0 &&
2808 clabel->column >= 0 &&
2809 clabel->num_rows > 0 &&
2810 clabel->num_columns > 0 &&
2811 clabel->row < clabel->num_rows &&
2812 clabel->column < clabel->num_columns &&
2813 clabel->blockSize > 0 &&
2814 clabel->numBlocks > 0) {
2815 /* label looks reasonable enough... */
2816 return(1);
2817 }
2818 return(0);
2819 }
2820
2821
2822 #if 0
2823 void
2824 rf_print_component_label(clabel)
2825 RF_ComponentLabel_t *clabel;
2826 {
2827 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2828 clabel->row, clabel->column,
2829 clabel->num_rows, clabel->num_columns);
2830 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2831 clabel->version, clabel->serial_number,
2832 clabel->mod_counter);
2833 printf(" Clean: %s Status: %d\n",
2834 clabel->clean ? "Yes" : "No", clabel->status );
2835 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2836 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2837 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2838 (char) clabel->parityConfig, clabel->blockSize,
2839 clabel->numBlocks);
2840 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2841 printf(" Contains root partition: %s\n",
2842 clabel->root_partition ? "Yes" : "No" );
2843 printf(" Last configured as: raid%d\n", clabel->last_unit );
2844 #if 0
2845 printf(" Config order: %d\n", clabel->config_order);
2846 #endif
2847
2848 }
2849 #endif
2850
2851 RF_ConfigSet_t *
2852 rf_create_auto_sets(ac_list)
2853 RF_AutoConfig_t *ac_list;
2854 {
2855 RF_AutoConfig_t *ac;
2856 RF_ConfigSet_t *config_sets;
2857 RF_ConfigSet_t *cset;
2858 RF_AutoConfig_t *ac_next;
2859
2860
2861 config_sets = NULL;
2862
2863 /* Go through the AutoConfig list, and figure out which components
2864 belong to what sets. */
2865 ac = ac_list;
2866 while(ac!=NULL) {
2867 /* we're going to putz with ac->next, so save it here
2868 for use at the end of the loop */
2869 ac_next = ac->next;
2870
2871 if (config_sets == NULL) {
2872 /* will need at least this one... */
2873 config_sets = (RF_ConfigSet_t *)
2874 malloc(sizeof(RF_ConfigSet_t),
2875 M_RAIDFRAME, M_NOWAIT);
2876 if (config_sets == NULL) {
2877 panic("rf_create_auto_sets: No memory!\n");
2878 }
2879 /* this one is easy :) */
2880 config_sets->ac = ac;
2881 config_sets->next = NULL;
2882 config_sets->rootable = 0;
2883 ac->next = NULL;
2884 } else {
2885 /* which set does this component fit into? */
2886 cset = config_sets;
2887 while(cset!=NULL) {
2888 if (rf_does_it_fit(cset, ac)) {
2889 /* looks like it matches... */
2890 ac->next = cset->ac;
2891 cset->ac = ac;
2892 break;
2893 }
2894 cset = cset->next;
2895 }
2896 if (cset==NULL) {
2897 /* didn't find a match above... new set..*/
2898 cset = (RF_ConfigSet_t *)
2899 malloc(sizeof(RF_ConfigSet_t),
2900 M_RAIDFRAME, M_NOWAIT);
2901 if (cset == NULL) {
2902 panic("rf_create_auto_sets: No memory!\n");
2903 }
2904 cset->ac = ac;
2905 ac->next = NULL;
2906 cset->next = config_sets;
2907 cset->rootable = 0;
2908 config_sets = cset;
2909 }
2910 }
2911 ac = ac_next;
2912 }
2913
2914
2915 return(config_sets);
2916 }
2917
2918 static int
2919 rf_does_it_fit(cset, ac)
2920 RF_ConfigSet_t *cset;
2921 RF_AutoConfig_t *ac;
2922 {
2923 RF_ComponentLabel_t *clabel1, *clabel2;
2924
2925 /* If this one matches the *first* one in the set, that's good
2926 enough, since the other members of the set would have been
2927 through here too... */
2928 /* note that we are not checking partitionSize here..
2929
2930 Note that we are also not checking the mod_counters here.
2931 If everything else matches execpt the mod_counter, that's
2932 good enough for this test. We will deal with the mod_counters
2933 a little later in the autoconfiguration process.
2934
2935 (clabel1->mod_counter == clabel2->mod_counter) &&
2936
2937 The reason we don't check for this is that failed disks
2938 will have lower modification counts. If those disks are
2939 not added to the set they used to belong to, then they will
2940 form their own set, which may result in 2 different sets,
2941 for example, competing to be configured at raid0, and
2942 perhaps competing to be the root filesystem set. If the
2943 wrong ones get configured, or both attempt to become /,
2944 weird behaviour and or serious lossage will occur. Thus we
2945 need to bring them into the fold here, and kick them out at
2946 a later point.
2947
2948 */
2949
2950 clabel1 = cset->ac->clabel;
2951 clabel2 = ac->clabel;
2952 if ((clabel1->version == clabel2->version) &&
2953 (clabel1->serial_number == clabel2->serial_number) &&
2954 (clabel1->num_rows == clabel2->num_rows) &&
2955 (clabel1->num_columns == clabel2->num_columns) &&
2956 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2957 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2958 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2959 (clabel1->parityConfig == clabel2->parityConfig) &&
2960 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2961 (clabel1->blockSize == clabel2->blockSize) &&
2962 (clabel1->numBlocks == clabel2->numBlocks) &&
2963 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2964 (clabel1->root_partition == clabel2->root_partition) &&
2965 (clabel1->last_unit == clabel2->last_unit) &&
2966 (clabel1->config_order == clabel2->config_order)) {
2967 /* if it get's here, it almost *has* to be a match */
2968 } else {
2969 /* it's not consistent with somebody in the set..
2970 punt */
2971 return(0);
2972 }
2973 /* all was fine.. it must fit... */
2974 return(1);
2975 }
2976
2977 int
2978 rf_have_enough_components(cset)
2979 RF_ConfigSet_t *cset;
2980 {
2981 RF_AutoConfig_t *ac;
2982 RF_AutoConfig_t *auto_config;
2983 RF_ComponentLabel_t *clabel;
2984 int r,c;
2985 int num_rows;
2986 int num_cols;
2987 int num_missing;
2988 int mod_counter;
2989 int mod_counter_found;
2990 int even_pair_failed;
2991 char parity_type;
2992
2993
2994 /* check to see that we have enough 'live' components
2995 of this set. If so, we can configure it if necessary */
2996
2997 num_rows = cset->ac->clabel->num_rows;
2998 num_cols = cset->ac->clabel->num_columns;
2999 parity_type = cset->ac->clabel->parityConfig;
3000
3001 /* XXX Check for duplicate components!?!?!? */
3002
3003 /* Determine what the mod_counter is supposed to be for this set. */
3004
3005 mod_counter_found = 0;
3006 mod_counter = 0;
3007 ac = cset->ac;
3008 while(ac!=NULL) {
3009 if (mod_counter_found==0) {
3010 mod_counter = ac->clabel->mod_counter;
3011 mod_counter_found = 1;
3012 } else {
3013 if (ac->clabel->mod_counter > mod_counter) {
3014 mod_counter = ac->clabel->mod_counter;
3015 }
3016 }
3017 ac = ac->next;
3018 }
3019
3020 num_missing = 0;
3021 auto_config = cset->ac;
3022
3023 for(r=0; r<num_rows; r++) {
3024 even_pair_failed = 0;
3025 for(c=0; c<num_cols; c++) {
3026 ac = auto_config;
3027 while(ac!=NULL) {
3028 if ((ac->clabel->row == r) &&
3029 (ac->clabel->column == c) &&
3030 (ac->clabel->mod_counter == mod_counter)) {
3031 /* it's this one... */
3032 #if DEBUG
3033 printf("Found: %s at %d,%d\n",
3034 ac->devname,r,c);
3035 #endif
3036 break;
3037 }
3038 ac=ac->next;
3039 }
3040 if (ac==NULL) {
3041 /* Didn't find one here! */
3042 /* special case for RAID 1, especially
3043 where there are more than 2
3044 components (where RAIDframe treats
3045 things a little differently :( ) */
3046 if (parity_type == '1') {
3047 if (c%2 == 0) { /* even component */
3048 even_pair_failed = 1;
3049 } else { /* odd component. If
3050 we're failed, and
3051 so is the even
3052 component, it's
3053 "Good Night, Charlie" */
3054 if (even_pair_failed == 1) {
3055 return(0);
3056 }
3057 }
3058 } else {
3059 /* normal accounting */
3060 num_missing++;
3061 }
3062 }
3063 if ((parity_type == '1') && (c%2 == 1)) {
3064 /* Just did an even component, and we didn't
3065 bail.. reset the even_pair_failed flag,
3066 and go on to the next component.... */
3067 even_pair_failed = 0;
3068 }
3069 }
3070 }
3071
3072 clabel = cset->ac->clabel;
3073
3074 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3075 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3076 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3077 /* XXX this needs to be made *much* more general */
3078 /* Too many failures */
3079 return(0);
3080 }
3081 /* otherwise, all is well, and we've got enough to take a kick
3082 at autoconfiguring this set */
3083 return(1);
3084 }
3085
3086 void
3087 rf_create_configuration(ac,config,raidPtr)
3088 RF_AutoConfig_t *ac;
3089 RF_Config_t *config;
3090 RF_Raid_t *raidPtr;
3091 {
3092 RF_ComponentLabel_t *clabel;
3093 int i;
3094
3095 clabel = ac->clabel;
3096
3097 /* 1. Fill in the common stuff */
3098 config->numRow = clabel->num_rows;
3099 config->numCol = clabel->num_columns;
3100 config->numSpare = 0; /* XXX should this be set here? */
3101 config->sectPerSU = clabel->sectPerSU;
3102 config->SUsPerPU = clabel->SUsPerPU;
3103 config->SUsPerRU = clabel->SUsPerRU;
3104 config->parityConfig = clabel->parityConfig;
3105 /* XXX... */
3106 strcpy(config->diskQueueType,"fifo");
3107 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3108 config->layoutSpecificSize = 0; /* XXX ?? */
3109
3110 while(ac!=NULL) {
3111 /* row/col values will be in range due to the checks
3112 in reasonable_label() */
3113 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3114 ac->devname);
3115 ac = ac->next;
3116 }
3117
3118 for(i=0;i<RF_MAXDBGV;i++) {
3119 config->debugVars[i][0] = NULL;
3120 }
3121 }
3122
3123 int
3124 rf_set_autoconfig(raidPtr, new_value)
3125 RF_Raid_t *raidPtr;
3126 int new_value;
3127 {
3128 RF_ComponentLabel_t clabel;
3129 struct vnode *vp;
3130 dev_t dev;
3131 int row, column;
3132
3133 raidPtr->autoconfigure = new_value;
3134 for(row=0; row<raidPtr->numRow; row++) {
3135 for(column=0; column<raidPtr->numCol; column++) {
3136 if (raidPtr->Disks[row][column].status ==
3137 rf_ds_optimal) {
3138 dev = raidPtr->Disks[row][column].dev;
3139 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3140 raidread_component_label(dev, vp, &clabel);
3141 clabel.autoconfigure = new_value;
3142 raidwrite_component_label(dev, vp, &clabel);
3143 }
3144 }
3145 }
3146 return(new_value);
3147 }
3148
3149 int
3150 rf_set_rootpartition(raidPtr, new_value)
3151 RF_Raid_t *raidPtr;
3152 int new_value;
3153 {
3154 RF_ComponentLabel_t clabel;
3155 struct vnode *vp;
3156 dev_t dev;
3157 int row, column;
3158
3159 raidPtr->root_partition = new_value;
3160 for(row=0; row<raidPtr->numRow; row++) {
3161 for(column=0; column<raidPtr->numCol; column++) {
3162 if (raidPtr->Disks[row][column].status ==
3163 rf_ds_optimal) {
3164 dev = raidPtr->Disks[row][column].dev;
3165 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3166 raidread_component_label(dev, vp, &clabel);
3167 clabel.root_partition = new_value;
3168 raidwrite_component_label(dev, vp, &clabel);
3169 }
3170 }
3171 }
3172 return(new_value);
3173 }
3174
3175 void
3176 rf_release_all_vps(cset)
3177 RF_ConfigSet_t *cset;
3178 {
3179 RF_AutoConfig_t *ac;
3180
3181 ac = cset->ac;
3182 while(ac!=NULL) {
3183 /* Close the vp, and give it back */
3184 if (ac->vp) {
3185 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3186 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3187 vput(ac->vp);
3188 ac->vp = NULL;
3189 }
3190 ac = ac->next;
3191 }
3192 }
3193
3194
3195 void
3196 rf_cleanup_config_set(cset)
3197 RF_ConfigSet_t *cset;
3198 {
3199 RF_AutoConfig_t *ac;
3200 RF_AutoConfig_t *next_ac;
3201
3202 ac = cset->ac;
3203 while(ac!=NULL) {
3204 next_ac = ac->next;
3205 /* nuke the label */
3206 free(ac->clabel, M_RAIDFRAME);
3207 /* cleanup the config structure */
3208 free(ac, M_RAIDFRAME);
3209 /* "next.." */
3210 ac = next_ac;
3211 }
3212 /* and, finally, nuke the config set */
3213 free(cset, M_RAIDFRAME);
3214 }
3215
3216
3217 void
3218 raid_init_component_label(raidPtr, clabel)
3219 RF_Raid_t *raidPtr;
3220 RF_ComponentLabel_t *clabel;
3221 {
3222 /* current version number */
3223 clabel->version = RF_COMPONENT_LABEL_VERSION;
3224 clabel->serial_number = raidPtr->serial_number;
3225 clabel->mod_counter = raidPtr->mod_counter;
3226 clabel->num_rows = raidPtr->numRow;
3227 clabel->num_columns = raidPtr->numCol;
3228 clabel->clean = RF_RAID_DIRTY; /* not clean */
3229 clabel->status = rf_ds_optimal; /* "It's good!" */
3230
3231 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3232 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3233 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3234
3235 clabel->blockSize = raidPtr->bytesPerSector;
3236 clabel->numBlocks = raidPtr->sectorsPerDisk;
3237
3238 /* XXX not portable */
3239 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3240 clabel->maxOutstanding = raidPtr->maxOutstanding;
3241 clabel->autoconfigure = raidPtr->autoconfigure;
3242 clabel->root_partition = raidPtr->root_partition;
3243 clabel->last_unit = raidPtr->raidid;
3244 clabel->config_order = raidPtr->config_order;
3245 }
3246
3247 int
3248 rf_auto_config_set(cset,unit)
3249 RF_ConfigSet_t *cset;
3250 int *unit;
3251 {
3252 RF_Raid_t *raidPtr;
3253 RF_Config_t *config;
3254 int raidID;
3255 int retcode;
3256
3257 #if DEBUG
3258 printf("RAID autoconfigure\n");
3259 #endif
3260
3261 retcode = 0;
3262 *unit = -1;
3263
3264 /* 1. Create a config structure */
3265
3266 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3267 M_RAIDFRAME,
3268 M_NOWAIT);
3269 if (config==NULL) {
3270 printf("Out of mem!?!?\n");
3271 /* XXX do something more intelligent here. */
3272 return(1);
3273 }
3274
3275 memset(config, 0, sizeof(RF_Config_t));
3276
3277 /*
3278 2. Figure out what RAID ID this one is supposed to live at
3279 See if we can get the same RAID dev that it was configured
3280 on last time..
3281 */
3282
3283 raidID = cset->ac->clabel->last_unit;
3284 if ((raidID < 0) || (raidID >= numraid)) {
3285 /* let's not wander off into lala land. */
3286 raidID = numraid - 1;
3287 }
3288 if (raidPtrs[raidID]->valid != 0) {
3289
3290 /*
3291 Nope... Go looking for an alternative...
3292 Start high so we don't immediately use raid0 if that's
3293 not taken.
3294 */
3295
3296 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3297 if (raidPtrs[raidID]->valid == 0) {
3298 /* can use this one! */
3299 break;
3300 }
3301 }
3302 }
3303
3304 if (raidID < 0) {
3305 /* punt... */
3306 printf("Unable to auto configure this set!\n");
3307 printf("(Out of RAID devs!)\n");
3308 return(1);
3309 }
3310
3311 #if DEBUG
3312 printf("Configuring raid%d:\n",raidID);
3313 #endif
3314
3315 raidPtr = raidPtrs[raidID];
3316
3317 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3318 raidPtr->raidid = raidID;
3319 raidPtr->openings = RAIDOUTSTANDING;
3320
3321 /* 3. Build the configuration structure */
3322 rf_create_configuration(cset->ac, config, raidPtr);
3323
3324 /* 4. Do the configuration */
3325 retcode = rf_Configure(raidPtr, config, cset->ac);
3326
3327 if (retcode == 0) {
3328
3329 raidinit(raidPtrs[raidID]);
3330
3331 rf_markalldirty(raidPtrs[raidID]);
3332 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3333 if (cset->ac->clabel->root_partition==1) {
3334 /* everything configured just fine. Make a note
3335 that this set is eligible to be root. */
3336 cset->rootable = 1;
3337 /* XXX do this here? */
3338 raidPtrs[raidID]->root_partition = 1;
3339 }
3340 }
3341
3342 /* 5. Cleanup */
3343 free(config, M_RAIDFRAME);
3344
3345 *unit = raidID;
3346 return(retcode);
3347 }
3348
3349 void
3350 rf_disk_unbusy(desc)
3351 RF_RaidAccessDesc_t *desc;
3352 {
3353 struct buf *bp;
3354
3355 bp = (struct buf *)desc->bp;
3356 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3357 (bp->b_bcount - bp->b_resid));
3358 }
3359