rf_netbsdkintf.c revision 1.132 1 /* $NetBSD: rf_netbsdkintf.c,v 1.132 2002/09/21 01:09:43 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.132 2002/09/21 01:09:43 oster Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit(RF_Raid_t *);
181
182 void raidattach(int);
183
184 dev_type_open(raidopen);
185 dev_type_close(raidclose);
186 dev_type_read(raidread);
187 dev_type_write(raidwrite);
188 dev_type_ioctl(raidioctl);
189 dev_type_strategy(raidstrategy);
190 dev_type_dump(raiddump);
191 dev_type_size(raidsize);
192
193 const struct bdevsw raid_bdevsw = {
194 raidopen, raidclose, raidstrategy, raidioctl,
195 raiddump, raidsize, D_DISK
196 };
197
198 const struct cdevsw raid_cdevsw = {
199 raidopen, raidclose, raidread, raidwrite, raidioctl,
200 nostop, notty, nopoll, nommap, D_DISK
201 };
202
203 /*
204 * Pilfered from ccd.c
205 */
206
207 struct raidbuf {
208 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
209 struct buf *rf_obp; /* ptr. to original I/O buf */
210 int rf_flags; /* misc. flags */
211 RF_DiskQueueData_t *req;/* the request that this was part of.. */
212 };
213
214 /* component buffer pool */
215 struct pool raidframe_cbufpool;
216
217 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
218 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
219
220 /* XXX Not sure if the following should be replacing the raidPtrs above,
221 or if it should be used in conjunction with that...
222 */
223
224 struct raid_softc {
225 int sc_flags; /* flags */
226 int sc_cflags; /* configuration flags */
227 size_t sc_size; /* size of the raid device */
228 char sc_xname[20]; /* XXX external name */
229 struct disk sc_dkdev; /* generic disk device info */
230 struct bufq_state buf_queue; /* used for the device queue */
231 };
232 /* sc_flags */
233 #define RAIDF_INITED 0x01 /* unit has been initialized */
234 #define RAIDF_WLABEL 0x02 /* label area is writable */
235 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
236 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
237 #define RAIDF_LOCKED 0x80 /* unit is locked */
238
239 #define raidunit(x) DISKUNIT(x)
240 int numraid = 0;
241
242 /*
243 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
244 * Be aware that large numbers can allow the driver to consume a lot of
245 * kernel memory, especially on writes, and in degraded mode reads.
246 *
247 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
248 * a single 64K write will typically require 64K for the old data,
249 * 64K for the old parity, and 64K for the new parity, for a total
250 * of 192K (if the parity buffer is not re-used immediately).
251 * Even it if is used immediately, that's still 128K, which when multiplied
252 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
253 *
254 * Now in degraded mode, for example, a 64K read on the above setup may
255 * require data reconstruction, which will require *all* of the 4 remaining
256 * disks to participate -- 4 * 32K/disk == 128K again.
257 */
258
259 #ifndef RAIDOUTSTANDING
260 #define RAIDOUTSTANDING 6
261 #endif
262
263 #define RAIDLABELDEV(dev) \
264 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
265
266 /* declared here, and made public, for the benefit of KVM stuff.. */
267 struct raid_softc *raid_softc;
268
269 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
270 struct disklabel *);
271 static void raidgetdisklabel(dev_t);
272 static void raidmakedisklabel(struct raid_softc *);
273
274 static int raidlock(struct raid_softc *);
275 static void raidunlock(struct raid_softc *);
276
277 static void rf_markalldirty(RF_Raid_t *);
278
279 struct device *raidrootdev;
280
281 void rf_ReconThread(struct rf_recon_req *);
282 /* XXX what I want is: */
283 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
284 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
285 void rf_CopybackThread(RF_Raid_t *raidPtr);
286 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
287 void rf_buildroothack(void *);
288
289 RF_AutoConfig_t *rf_find_raid_components(void);
290 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
291 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
292 static int rf_reasonable_label(RF_ComponentLabel_t *);
293 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
294 int rf_set_autoconfig(RF_Raid_t *, int);
295 int rf_set_rootpartition(RF_Raid_t *, int);
296 void rf_release_all_vps(RF_ConfigSet_t *);
297 void rf_cleanup_config_set(RF_ConfigSet_t *);
298 int rf_have_enough_components(RF_ConfigSet_t *);
299 int rf_auto_config_set(RF_ConfigSet_t *, int *);
300
301 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
302 allow autoconfig to take place.
303 Note that this is overridden by having
304 RAID_AUTOCONFIG as an option in the
305 kernel config file. */
306
307 void
308 raidattach(num)
309 int num;
310 {
311 int raidID;
312 int i, rc;
313 RF_AutoConfig_t *ac_list; /* autoconfig list */
314 RF_ConfigSet_t *config_sets;
315
316 #ifdef DEBUG
317 printf("raidattach: Asked for %d units\n", num);
318 #endif
319
320 if (num <= 0) {
321 #ifdef DIAGNOSTIC
322 panic("raidattach: count <= 0");
323 #endif
324 return;
325 }
326 /* This is where all the initialization stuff gets done. */
327
328 numraid = num;
329
330 /* Make some space for requested number of units... */
331
332 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
333 if (raidPtrs == NULL) {
334 panic("raidPtrs is NULL!!\n");
335 }
336
337 /* Initialize the component buffer pool. */
338 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
339 0, 0, "raidpl", NULL);
340
341 rc = rf_mutex_init(&rf_sparet_wait_mutex);
342 if (rc) {
343 RF_PANIC();
344 }
345
346 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
347
348 for (i = 0; i < num; i++)
349 raidPtrs[i] = NULL;
350 rc = rf_BootRaidframe();
351 if (rc == 0)
352 printf("Kernelized RAIDframe activated\n");
353 else
354 panic("Serious error booting RAID!!\n");
355
356 /* put together some datastructures like the CCD device does.. This
357 * lets us lock the device and what-not when it gets opened. */
358
359 raid_softc = (struct raid_softc *)
360 malloc(num * sizeof(struct raid_softc),
361 M_RAIDFRAME, M_NOWAIT);
362 if (raid_softc == NULL) {
363 printf("WARNING: no memory for RAIDframe driver\n");
364 return;
365 }
366
367 memset(raid_softc, 0, num * sizeof(struct raid_softc));
368
369 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
370 M_RAIDFRAME, M_NOWAIT);
371 if (raidrootdev == NULL) {
372 panic("No memory for RAIDframe driver!!?!?!\n");
373 }
374
375 for (raidID = 0; raidID < num; raidID++) {
376 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
377
378 raidrootdev[raidID].dv_class = DV_DISK;
379 raidrootdev[raidID].dv_cfdata = NULL;
380 raidrootdev[raidID].dv_unit = raidID;
381 raidrootdev[raidID].dv_parent = NULL;
382 raidrootdev[raidID].dv_flags = 0;
383 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
384
385 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
386 (RF_Raid_t *));
387 if (raidPtrs[raidID] == NULL) {
388 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
389 numraid = raidID;
390 return;
391 }
392 }
393
394 #ifdef RAID_AUTOCONFIG
395 raidautoconfig = 1;
396 #endif
397
398 if (raidautoconfig) {
399 /* 1. locate all RAID components on the system */
400
401 #if DEBUG
402 printf("Searching for raid components...\n");
403 #endif
404 ac_list = rf_find_raid_components();
405
406 /* 2. sort them into their respective sets */
407
408 config_sets = rf_create_auto_sets(ac_list);
409
410 /* 3. evaluate each set and configure the valid ones
411 This gets done in rf_buildroothack() */
412
413 /* schedule the creation of the thread to do the
414 "/ on RAID" stuff */
415
416 kthread_create(rf_buildroothack,config_sets);
417
418 }
419
420 }
421
422 void
423 rf_buildroothack(arg)
424 void *arg;
425 {
426 RF_ConfigSet_t *config_sets = arg;
427 RF_ConfigSet_t *cset;
428 RF_ConfigSet_t *next_cset;
429 int retcode;
430 int raidID;
431 int rootID;
432 int num_root;
433
434 rootID = 0;
435 num_root = 0;
436 cset = config_sets;
437 while(cset != NULL ) {
438 next_cset = cset->next;
439 if (rf_have_enough_components(cset) &&
440 cset->ac->clabel->autoconfigure==1) {
441 retcode = rf_auto_config_set(cset,&raidID);
442 if (!retcode) {
443 if (cset->rootable) {
444 rootID = raidID;
445 num_root++;
446 }
447 } else {
448 /* The autoconfig didn't work :( */
449 #if DEBUG
450 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
451 #endif
452 rf_release_all_vps(cset);
453 }
454 } else {
455 /* we're not autoconfiguring this set...
456 release the associated resources */
457 rf_release_all_vps(cset);
458 }
459 /* cleanup */
460 rf_cleanup_config_set(cset);
461 cset = next_cset;
462 }
463
464 /* we found something bootable... */
465
466 if (num_root == 1) {
467 booted_device = &raidrootdev[rootID];
468 } else if (num_root > 1) {
469 /* we can't guess.. require the user to answer... */
470 boothowto |= RB_ASKNAME;
471 }
472 }
473
474
475 int
476 raidsize(dev)
477 dev_t dev;
478 {
479 struct raid_softc *rs;
480 struct disklabel *lp;
481 int part, unit, omask, size;
482
483 unit = raidunit(dev);
484 if (unit >= numraid)
485 return (-1);
486 rs = &raid_softc[unit];
487
488 if ((rs->sc_flags & RAIDF_INITED) == 0)
489 return (-1);
490
491 part = DISKPART(dev);
492 omask = rs->sc_dkdev.dk_openmask & (1 << part);
493 lp = rs->sc_dkdev.dk_label;
494
495 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
496 return (-1);
497
498 if (lp->d_partitions[part].p_fstype != FS_SWAP)
499 size = -1;
500 else
501 size = lp->d_partitions[part].p_size *
502 (lp->d_secsize / DEV_BSIZE);
503
504 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
505 return (-1);
506
507 return (size);
508
509 }
510
511 int
512 raiddump(dev, blkno, va, size)
513 dev_t dev;
514 daddr_t blkno;
515 caddr_t va;
516 size_t size;
517 {
518 /* Not implemented. */
519 return ENXIO;
520 }
521 /* ARGSUSED */
522 int
523 raidopen(dev, flags, fmt, p)
524 dev_t dev;
525 int flags, fmt;
526 struct proc *p;
527 {
528 int unit = raidunit(dev);
529 struct raid_softc *rs;
530 struct disklabel *lp;
531 int part, pmask;
532 int error = 0;
533
534 if (unit >= numraid)
535 return (ENXIO);
536 rs = &raid_softc[unit];
537
538 if ((error = raidlock(rs)) != 0)
539 return (error);
540 lp = rs->sc_dkdev.dk_label;
541
542 part = DISKPART(dev);
543 pmask = (1 << part);
544
545 db1_printf(("Opening raid device number: %d partition: %d\n",
546 unit, part));
547
548
549 if ((rs->sc_flags & RAIDF_INITED) &&
550 (rs->sc_dkdev.dk_openmask == 0))
551 raidgetdisklabel(dev);
552
553 /* make sure that this partition exists */
554
555 if (part != RAW_PART) {
556 db1_printf(("Not a raw partition..\n"));
557 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
558 ((part >= lp->d_npartitions) ||
559 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
560 error = ENXIO;
561 raidunlock(rs);
562 db1_printf(("Bailing out...\n"));
563 return (error);
564 }
565 }
566 /* Prevent this unit from being unconfigured while open. */
567 switch (fmt) {
568 case S_IFCHR:
569 rs->sc_dkdev.dk_copenmask |= pmask;
570 break;
571
572 case S_IFBLK:
573 rs->sc_dkdev.dk_bopenmask |= pmask;
574 break;
575 }
576
577 if ((rs->sc_dkdev.dk_openmask == 0) &&
578 ((rs->sc_flags & RAIDF_INITED) != 0)) {
579 /* First one... mark things as dirty... Note that we *MUST*
580 have done a configure before this. I DO NOT WANT TO BE
581 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
582 THAT THEY BELONG TOGETHER!!!!! */
583 /* XXX should check to see if we're only open for reading
584 here... If so, we needn't do this, but then need some
585 other way of keeping track of what's happened.. */
586
587 rf_markalldirty( raidPtrs[unit] );
588 }
589
590
591 rs->sc_dkdev.dk_openmask =
592 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
593
594 raidunlock(rs);
595
596 return (error);
597
598
599 }
600 /* ARGSUSED */
601 int
602 raidclose(dev, flags, fmt, p)
603 dev_t dev;
604 int flags, fmt;
605 struct proc *p;
606 {
607 int unit = raidunit(dev);
608 struct raid_softc *rs;
609 int error = 0;
610 int part;
611
612 if (unit >= numraid)
613 return (ENXIO);
614 rs = &raid_softc[unit];
615
616 if ((error = raidlock(rs)) != 0)
617 return (error);
618
619 part = DISKPART(dev);
620
621 /* ...that much closer to allowing unconfiguration... */
622 switch (fmt) {
623 case S_IFCHR:
624 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
625 break;
626
627 case S_IFBLK:
628 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
629 break;
630 }
631 rs->sc_dkdev.dk_openmask =
632 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
633
634 if ((rs->sc_dkdev.dk_openmask == 0) &&
635 ((rs->sc_flags & RAIDF_INITED) != 0)) {
636 /* Last one... device is not unconfigured yet.
637 Device shutdown has taken care of setting the
638 clean bits if RAIDF_INITED is not set
639 mark things as clean... */
640 #if 0
641 printf("Last one on raid%d. Updating status.\n",unit);
642 #endif
643 rf_update_component_labels(raidPtrs[unit],
644 RF_FINAL_COMPONENT_UPDATE);
645 if (doing_shutdown) {
646 /* last one, and we're going down, so
647 lights out for this RAID set too. */
648 error = rf_Shutdown(raidPtrs[unit]);
649
650 /* It's no longer initialized... */
651 rs->sc_flags &= ~RAIDF_INITED;
652
653 /* Detach the disk. */
654 disk_detach(&rs->sc_dkdev);
655 }
656 }
657
658 raidunlock(rs);
659 return (0);
660
661 }
662
663 void
664 raidstrategy(bp)
665 struct buf *bp;
666 {
667 int s;
668
669 unsigned int raidID = raidunit(bp->b_dev);
670 RF_Raid_t *raidPtr;
671 struct raid_softc *rs = &raid_softc[raidID];
672 struct disklabel *lp;
673 int wlabel;
674
675 if ((rs->sc_flags & RAIDF_INITED) ==0) {
676 bp->b_error = ENXIO;
677 bp->b_flags |= B_ERROR;
678 bp->b_resid = bp->b_bcount;
679 biodone(bp);
680 return;
681 }
682 if (raidID >= numraid || !raidPtrs[raidID]) {
683 bp->b_error = ENODEV;
684 bp->b_flags |= B_ERROR;
685 bp->b_resid = bp->b_bcount;
686 biodone(bp);
687 return;
688 }
689 raidPtr = raidPtrs[raidID];
690 if (!raidPtr->valid) {
691 bp->b_error = ENODEV;
692 bp->b_flags |= B_ERROR;
693 bp->b_resid = bp->b_bcount;
694 biodone(bp);
695 return;
696 }
697 if (bp->b_bcount == 0) {
698 db1_printf(("b_bcount is zero..\n"));
699 biodone(bp);
700 return;
701 }
702 lp = rs->sc_dkdev.dk_label;
703
704 /*
705 * Do bounds checking and adjust transfer. If there's an
706 * error, the bounds check will flag that for us.
707 */
708
709 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
710 if (DISKPART(bp->b_dev) != RAW_PART)
711 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
712 db1_printf(("Bounds check failed!!:%d %d\n",
713 (int) bp->b_blkno, (int) wlabel));
714 biodone(bp);
715 return;
716 }
717 s = splbio();
718
719 bp->b_resid = 0;
720
721 /* stuff it onto our queue */
722 BUFQ_PUT(&rs->buf_queue, bp);
723
724 raidstart(raidPtrs[raidID]);
725
726 splx(s);
727 }
728 /* ARGSUSED */
729 int
730 raidread(dev, uio, flags)
731 dev_t dev;
732 struct uio *uio;
733 int flags;
734 {
735 int unit = raidunit(dev);
736 struct raid_softc *rs;
737 int part;
738
739 if (unit >= numraid)
740 return (ENXIO);
741 rs = &raid_softc[unit];
742
743 if ((rs->sc_flags & RAIDF_INITED) == 0)
744 return (ENXIO);
745 part = DISKPART(dev);
746
747 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
748
749 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
750
751 }
752 /* ARGSUSED */
753 int
754 raidwrite(dev, uio, flags)
755 dev_t dev;
756 struct uio *uio;
757 int flags;
758 {
759 int unit = raidunit(dev);
760 struct raid_softc *rs;
761
762 if (unit >= numraid)
763 return (ENXIO);
764 rs = &raid_softc[unit];
765
766 if ((rs->sc_flags & RAIDF_INITED) == 0)
767 return (ENXIO);
768 db1_printf(("raidwrite\n"));
769 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
770
771 }
772
773 int
774 raidioctl(dev, cmd, data, flag, p)
775 dev_t dev;
776 u_long cmd;
777 caddr_t data;
778 int flag;
779 struct proc *p;
780 {
781 int unit = raidunit(dev);
782 int error = 0;
783 int part, pmask;
784 struct raid_softc *rs;
785 RF_Config_t *k_cfg, *u_cfg;
786 RF_Raid_t *raidPtr;
787 RF_RaidDisk_t *diskPtr;
788 RF_AccTotals_t *totals;
789 RF_DeviceConfig_t *d_cfg, **ucfgp;
790 u_char *specific_buf;
791 int retcode = 0;
792 int row;
793 int column;
794 int raidid;
795 struct rf_recon_req *rrcopy, *rr;
796 RF_ComponentLabel_t *clabel;
797 RF_ComponentLabel_t ci_label;
798 RF_ComponentLabel_t **clabel_ptr;
799 RF_SingleComponent_t *sparePtr,*componentPtr;
800 RF_SingleComponent_t hot_spare;
801 RF_SingleComponent_t component;
802 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
803 int i, j, d;
804 #ifdef __HAVE_OLD_DISKLABEL
805 struct disklabel newlabel;
806 #endif
807
808 if (unit >= numraid)
809 return (ENXIO);
810 rs = &raid_softc[unit];
811 raidPtr = raidPtrs[unit];
812
813 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
814 (int) DISKPART(dev), (int) unit, (int) cmd));
815
816 /* Must be open for writes for these commands... */
817 switch (cmd) {
818 case DIOCSDINFO:
819 case DIOCWDINFO:
820 #ifdef __HAVE_OLD_DISKLABEL
821 case ODIOCWDINFO:
822 case ODIOCSDINFO:
823 #endif
824 case DIOCWLABEL:
825 if ((flag & FWRITE) == 0)
826 return (EBADF);
827 }
828
829 /* Must be initialized for these... */
830 switch (cmd) {
831 case DIOCGDINFO:
832 case DIOCSDINFO:
833 case DIOCWDINFO:
834 #ifdef __HAVE_OLD_DISKLABEL
835 case ODIOCGDINFO:
836 case ODIOCWDINFO:
837 case ODIOCSDINFO:
838 case ODIOCGDEFLABEL:
839 #endif
840 case DIOCGPART:
841 case DIOCWLABEL:
842 case DIOCGDEFLABEL:
843 case RAIDFRAME_SHUTDOWN:
844 case RAIDFRAME_REWRITEPARITY:
845 case RAIDFRAME_GET_INFO:
846 case RAIDFRAME_RESET_ACCTOTALS:
847 case RAIDFRAME_GET_ACCTOTALS:
848 case RAIDFRAME_KEEP_ACCTOTALS:
849 case RAIDFRAME_GET_SIZE:
850 case RAIDFRAME_FAIL_DISK:
851 case RAIDFRAME_COPYBACK:
852 case RAIDFRAME_CHECK_RECON_STATUS:
853 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
854 case RAIDFRAME_GET_COMPONENT_LABEL:
855 case RAIDFRAME_SET_COMPONENT_LABEL:
856 case RAIDFRAME_ADD_HOT_SPARE:
857 case RAIDFRAME_REMOVE_HOT_SPARE:
858 case RAIDFRAME_INIT_LABELS:
859 case RAIDFRAME_REBUILD_IN_PLACE:
860 case RAIDFRAME_CHECK_PARITY:
861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
863 case RAIDFRAME_CHECK_COPYBACK_STATUS:
864 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
865 case RAIDFRAME_SET_AUTOCONFIG:
866 case RAIDFRAME_SET_ROOT:
867 case RAIDFRAME_DELETE_COMPONENT:
868 case RAIDFRAME_INCORPORATE_HOT_SPARE:
869 if ((rs->sc_flags & RAIDF_INITED) == 0)
870 return (ENXIO);
871 }
872
873 switch (cmd) {
874
875 /* configure the system */
876 case RAIDFRAME_CONFIGURE:
877
878 if (raidPtr->valid) {
879 /* There is a valid RAID set running on this unit! */
880 printf("raid%d: Device already configured!\n",unit);
881 return(EINVAL);
882 }
883
884 /* copy-in the configuration information */
885 /* data points to a pointer to the configuration structure */
886
887 u_cfg = *((RF_Config_t **) data);
888 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
889 if (k_cfg == NULL) {
890 return (ENOMEM);
891 }
892 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
893 sizeof(RF_Config_t));
894 if (retcode) {
895 RF_Free(k_cfg, sizeof(RF_Config_t));
896 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
897 retcode));
898 return (retcode);
899 }
900 /* allocate a buffer for the layout-specific data, and copy it
901 * in */
902 if (k_cfg->layoutSpecificSize) {
903 if (k_cfg->layoutSpecificSize > 10000) {
904 /* sanity check */
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (EINVAL);
907 }
908 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
909 (u_char *));
910 if (specific_buf == NULL) {
911 RF_Free(k_cfg, sizeof(RF_Config_t));
912 return (ENOMEM);
913 }
914 retcode = copyin(k_cfg->layoutSpecific,
915 (caddr_t) specific_buf,
916 k_cfg->layoutSpecificSize);
917 if (retcode) {
918 RF_Free(k_cfg, sizeof(RF_Config_t));
919 RF_Free(specific_buf,
920 k_cfg->layoutSpecificSize);
921 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
922 retcode));
923 return (retcode);
924 }
925 } else
926 specific_buf = NULL;
927 k_cfg->layoutSpecific = specific_buf;
928
929 /* should do some kind of sanity check on the configuration.
930 * Store the sum of all the bytes in the last byte? */
931
932 /* configure the system */
933
934 /*
935 * Clear the entire RAID descriptor, just to make sure
936 * there is no stale data left in the case of a
937 * reconfiguration
938 */
939 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
940 raidPtr->raidid = unit;
941
942 retcode = rf_Configure(raidPtr, k_cfg, NULL);
943
944 if (retcode == 0) {
945
946 /* allow this many simultaneous IO's to
947 this RAID device */
948 raidPtr->openings = RAIDOUTSTANDING;
949
950 raidinit(raidPtr);
951 rf_markalldirty(raidPtr);
952 }
953 /* free the buffers. No return code here. */
954 if (k_cfg->layoutSpecificSize) {
955 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
956 }
957 RF_Free(k_cfg, sizeof(RF_Config_t));
958
959 return (retcode);
960
961 /* shutdown the system */
962 case RAIDFRAME_SHUTDOWN:
963
964 if ((error = raidlock(rs)) != 0)
965 return (error);
966
967 /*
968 * If somebody has a partition mounted, we shouldn't
969 * shutdown.
970 */
971
972 part = DISKPART(dev);
973 pmask = (1 << part);
974 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
975 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
976 (rs->sc_dkdev.dk_copenmask & pmask))) {
977 raidunlock(rs);
978 return (EBUSY);
979 }
980
981 retcode = rf_Shutdown(raidPtr);
982
983 /* It's no longer initialized... */
984 rs->sc_flags &= ~RAIDF_INITED;
985
986 /* Detach the disk. */
987 disk_detach(&rs->sc_dkdev);
988
989 raidunlock(rs);
990
991 return (retcode);
992 case RAIDFRAME_GET_COMPONENT_LABEL:
993 clabel_ptr = (RF_ComponentLabel_t **) data;
994 /* need to read the component label for the disk indicated
995 by row,column in clabel */
996
997 /* For practice, let's get it directly fromdisk, rather
998 than from the in-core copy */
999 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1000 (RF_ComponentLabel_t *));
1001 if (clabel == NULL)
1002 return (ENOMEM);
1003
1004 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1005
1006 retcode = copyin( *clabel_ptr, clabel,
1007 sizeof(RF_ComponentLabel_t));
1008
1009 if (retcode) {
1010 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1011 return(retcode);
1012 }
1013
1014 row = clabel->row;
1015 column = clabel->column;
1016
1017 if ((row < 0) || (row >= raidPtr->numRow) ||
1018 (column < 0) || (column >= raidPtr->numCol +
1019 raidPtr->numSpare)) {
1020 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1021 return(EINVAL);
1022 }
1023
1024 raidread_component_label(raidPtr->Disks[row][column].dev,
1025 raidPtr->raid_cinfo[row][column].ci_vp,
1026 clabel );
1027
1028 retcode = copyout((caddr_t) clabel,
1029 (caddr_t) *clabel_ptr,
1030 sizeof(RF_ComponentLabel_t));
1031 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1032 return (retcode);
1033
1034 case RAIDFRAME_SET_COMPONENT_LABEL:
1035 clabel = (RF_ComponentLabel_t *) data;
1036
1037 /* XXX check the label for valid stuff... */
1038 /* Note that some things *should not* get modified --
1039 the user should be re-initing the labels instead of
1040 trying to patch things.
1041 */
1042
1043 raidid = raidPtr->raidid;
1044 printf("raid%d: Got component label:\n", raidid);
1045 printf("raid%d: Version: %d\n", raidid, clabel->version);
1046 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1047 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1048 printf("raid%d: Row: %d\n", raidid, clabel->row);
1049 printf("raid%d: Column: %d\n", raidid, clabel->column);
1050 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1051 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1052 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1053 printf("raid%d: Status: %d\n", raidid, clabel->status);
1054
1055 row = clabel->row;
1056 column = clabel->column;
1057
1058 if ((row < 0) || (row >= raidPtr->numRow) ||
1059 (column < 0) || (column >= raidPtr->numCol)) {
1060 return(EINVAL);
1061 }
1062
1063 /* XXX this isn't allowed to do anything for now :-) */
1064
1065 /* XXX and before it is, we need to fill in the rest
1066 of the fields!?!?!?! */
1067 #if 0
1068 raidwrite_component_label(
1069 raidPtr->Disks[row][column].dev,
1070 raidPtr->raid_cinfo[row][column].ci_vp,
1071 clabel );
1072 #endif
1073 return (0);
1074
1075 case RAIDFRAME_INIT_LABELS:
1076 clabel = (RF_ComponentLabel_t *) data;
1077 /*
1078 we only want the serial number from
1079 the above. We get all the rest of the information
1080 from the config that was used to create this RAID
1081 set.
1082 */
1083
1084 raidPtr->serial_number = clabel->serial_number;
1085
1086 raid_init_component_label(raidPtr, &ci_label);
1087 ci_label.serial_number = clabel->serial_number;
1088
1089 for(row=0;row<raidPtr->numRow;row++) {
1090 ci_label.row = row;
1091 for(column=0;column<raidPtr->numCol;column++) {
1092 diskPtr = &raidPtr->Disks[row][column];
1093 if (!RF_DEAD_DISK(diskPtr->status)) {
1094 ci_label.partitionSize = diskPtr->partitionSize;
1095 ci_label.column = column;
1096 raidwrite_component_label(
1097 raidPtr->Disks[row][column].dev,
1098 raidPtr->raid_cinfo[row][column].ci_vp,
1099 &ci_label );
1100 }
1101 }
1102 }
1103
1104 return (retcode);
1105 case RAIDFRAME_SET_AUTOCONFIG:
1106 d = rf_set_autoconfig(raidPtr, *(int *) data);
1107 printf("raid%d: New autoconfig value is: %d\n",
1108 raidPtr->raidid, d);
1109 *(int *) data = d;
1110 return (retcode);
1111
1112 case RAIDFRAME_SET_ROOT:
1113 d = rf_set_rootpartition(raidPtr, *(int *) data);
1114 printf("raid%d: New rootpartition value is: %d\n",
1115 raidPtr->raidid, d);
1116 *(int *) data = d;
1117 return (retcode);
1118
1119 /* initialize all parity */
1120 case RAIDFRAME_REWRITEPARITY:
1121
1122 if (raidPtr->Layout.map->faultsTolerated == 0) {
1123 /* Parity for RAID 0 is trivially correct */
1124 raidPtr->parity_good = RF_RAID_CLEAN;
1125 return(0);
1126 }
1127
1128 if (raidPtr->parity_rewrite_in_progress == 1) {
1129 /* Re-write is already in progress! */
1130 return(EINVAL);
1131 }
1132
1133 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1134 rf_RewriteParityThread,
1135 raidPtr,"raid_parity");
1136 return (retcode);
1137
1138
1139 case RAIDFRAME_ADD_HOT_SPARE:
1140 sparePtr = (RF_SingleComponent_t *) data;
1141 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1142 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1143 return(retcode);
1144
1145 case RAIDFRAME_REMOVE_HOT_SPARE:
1146 return(retcode);
1147
1148 case RAIDFRAME_DELETE_COMPONENT:
1149 componentPtr = (RF_SingleComponent_t *)data;
1150 memcpy( &component, componentPtr,
1151 sizeof(RF_SingleComponent_t));
1152 retcode = rf_delete_component(raidPtr, &component);
1153 return(retcode);
1154
1155 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1156 componentPtr = (RF_SingleComponent_t *)data;
1157 memcpy( &component, componentPtr,
1158 sizeof(RF_SingleComponent_t));
1159 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1160 return(retcode);
1161
1162 case RAIDFRAME_REBUILD_IN_PLACE:
1163
1164 if (raidPtr->Layout.map->faultsTolerated == 0) {
1165 /* Can't do this on a RAID 0!! */
1166 return(EINVAL);
1167 }
1168
1169 if (raidPtr->recon_in_progress == 1) {
1170 /* a reconstruct is already in progress! */
1171 return(EINVAL);
1172 }
1173
1174 componentPtr = (RF_SingleComponent_t *) data;
1175 memcpy( &component, componentPtr,
1176 sizeof(RF_SingleComponent_t));
1177 row = component.row;
1178 column = component.column;
1179 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1180 row, column);
1181 if ((row < 0) || (row >= raidPtr->numRow) ||
1182 (column < 0) || (column >= raidPtr->numCol)) {
1183 return(EINVAL);
1184 }
1185
1186 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1187 if (rrcopy == NULL)
1188 return(ENOMEM);
1189
1190 rrcopy->raidPtr = (void *) raidPtr;
1191 rrcopy->row = row;
1192 rrcopy->col = column;
1193
1194 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1195 rf_ReconstructInPlaceThread,
1196 rrcopy,"raid_reconip");
1197 return(retcode);
1198
1199 case RAIDFRAME_GET_INFO:
1200 if (!raidPtr->valid)
1201 return (ENODEV);
1202 ucfgp = (RF_DeviceConfig_t **) data;
1203 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1204 (RF_DeviceConfig_t *));
1205 if (d_cfg == NULL)
1206 return (ENOMEM);
1207 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1208 d_cfg->rows = raidPtr->numRow;
1209 d_cfg->cols = raidPtr->numCol;
1210 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1211 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1213 return (ENOMEM);
1214 }
1215 d_cfg->nspares = raidPtr->numSpare;
1216 if (d_cfg->nspares >= RF_MAX_DISKS) {
1217 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1218 return (ENOMEM);
1219 }
1220 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1221 d = 0;
1222 for (i = 0; i < d_cfg->rows; i++) {
1223 for (j = 0; j < d_cfg->cols; j++) {
1224 d_cfg->devs[d] = raidPtr->Disks[i][j];
1225 d++;
1226 }
1227 }
1228 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1229 d_cfg->spares[i] = raidPtr->Disks[0][j];
1230 }
1231 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1232 sizeof(RF_DeviceConfig_t));
1233 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1234
1235 return (retcode);
1236
1237 case RAIDFRAME_CHECK_PARITY:
1238 *(int *) data = raidPtr->parity_good;
1239 return (0);
1240
1241 case RAIDFRAME_RESET_ACCTOTALS:
1242 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1243 return (0);
1244
1245 case RAIDFRAME_GET_ACCTOTALS:
1246 totals = (RF_AccTotals_t *) data;
1247 *totals = raidPtr->acc_totals;
1248 return (0);
1249
1250 case RAIDFRAME_KEEP_ACCTOTALS:
1251 raidPtr->keep_acc_totals = *(int *)data;
1252 return (0);
1253
1254 case RAIDFRAME_GET_SIZE:
1255 *(int *) data = raidPtr->totalSectors;
1256 return (0);
1257
1258 /* fail a disk & optionally start reconstruction */
1259 case RAIDFRAME_FAIL_DISK:
1260
1261 if (raidPtr->Layout.map->faultsTolerated == 0) {
1262 /* Can't do this on a RAID 0!! */
1263 return(EINVAL);
1264 }
1265
1266 rr = (struct rf_recon_req *) data;
1267
1268 if (rr->row < 0 || rr->row >= raidPtr->numRow
1269 || rr->col < 0 || rr->col >= raidPtr->numCol)
1270 return (EINVAL);
1271
1272 printf("raid%d: Failing the disk: row: %d col: %d\n",
1273 unit, rr->row, rr->col);
1274
1275 /* make a copy of the recon request so that we don't rely on
1276 * the user's buffer */
1277 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1278 if (rrcopy == NULL)
1279 return(ENOMEM);
1280 memcpy(rrcopy, rr, sizeof(*rr));
1281 rrcopy->raidPtr = (void *) raidPtr;
1282
1283 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1284 rf_ReconThread,
1285 rrcopy,"raid_recon");
1286 return (0);
1287
1288 /* invoke a copyback operation after recon on whatever disk
1289 * needs it, if any */
1290 case RAIDFRAME_COPYBACK:
1291
1292 if (raidPtr->Layout.map->faultsTolerated == 0) {
1293 /* This makes no sense on a RAID 0!! */
1294 return(EINVAL);
1295 }
1296
1297 if (raidPtr->copyback_in_progress == 1) {
1298 /* Copyback is already in progress! */
1299 return(EINVAL);
1300 }
1301
1302 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1303 rf_CopybackThread,
1304 raidPtr,"raid_copyback");
1305 return (retcode);
1306
1307 /* return the percentage completion of reconstruction */
1308 case RAIDFRAME_CHECK_RECON_STATUS:
1309 if (raidPtr->Layout.map->faultsTolerated == 0) {
1310 /* This makes no sense on a RAID 0, so tell the
1311 user it's done. */
1312 *(int *) data = 100;
1313 return(0);
1314 }
1315 row = 0; /* XXX we only consider a single row... */
1316 if (raidPtr->status[row] != rf_rs_reconstructing)
1317 *(int *) data = 100;
1318 else
1319 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1320 return (0);
1321 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1322 progressInfoPtr = (RF_ProgressInfo_t **) data;
1323 row = 0; /* XXX we only consider a single row... */
1324 if (raidPtr->status[row] != rf_rs_reconstructing) {
1325 progressInfo.remaining = 0;
1326 progressInfo.completed = 100;
1327 progressInfo.total = 100;
1328 } else {
1329 progressInfo.total =
1330 raidPtr->reconControl[row]->numRUsTotal;
1331 progressInfo.completed =
1332 raidPtr->reconControl[row]->numRUsComplete;
1333 progressInfo.remaining = progressInfo.total -
1334 progressInfo.completed;
1335 }
1336 retcode = copyout((caddr_t) &progressInfo,
1337 (caddr_t) *progressInfoPtr,
1338 sizeof(RF_ProgressInfo_t));
1339 return (retcode);
1340
1341 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1342 if (raidPtr->Layout.map->faultsTolerated == 0) {
1343 /* This makes no sense on a RAID 0, so tell the
1344 user it's done. */
1345 *(int *) data = 100;
1346 return(0);
1347 }
1348 if (raidPtr->parity_rewrite_in_progress == 1) {
1349 *(int *) data = 100 *
1350 raidPtr->parity_rewrite_stripes_done /
1351 raidPtr->Layout.numStripe;
1352 } else {
1353 *(int *) data = 100;
1354 }
1355 return (0);
1356
1357 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1358 progressInfoPtr = (RF_ProgressInfo_t **) data;
1359 if (raidPtr->parity_rewrite_in_progress == 1) {
1360 progressInfo.total = raidPtr->Layout.numStripe;
1361 progressInfo.completed =
1362 raidPtr->parity_rewrite_stripes_done;
1363 progressInfo.remaining = progressInfo.total -
1364 progressInfo.completed;
1365 } else {
1366 progressInfo.remaining = 0;
1367 progressInfo.completed = 100;
1368 progressInfo.total = 100;
1369 }
1370 retcode = copyout((caddr_t) &progressInfo,
1371 (caddr_t) *progressInfoPtr,
1372 sizeof(RF_ProgressInfo_t));
1373 return (retcode);
1374
1375 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1376 if (raidPtr->Layout.map->faultsTolerated == 0) {
1377 /* This makes no sense on a RAID 0 */
1378 *(int *) data = 100;
1379 return(0);
1380 }
1381 if (raidPtr->copyback_in_progress == 1) {
1382 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1383 raidPtr->Layout.numStripe;
1384 } else {
1385 *(int *) data = 100;
1386 }
1387 return (0);
1388
1389 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1390 progressInfoPtr = (RF_ProgressInfo_t **) data;
1391 if (raidPtr->copyback_in_progress == 1) {
1392 progressInfo.total = raidPtr->Layout.numStripe;
1393 progressInfo.completed =
1394 raidPtr->copyback_stripes_done;
1395 progressInfo.remaining = progressInfo.total -
1396 progressInfo.completed;
1397 } else {
1398 progressInfo.remaining = 0;
1399 progressInfo.completed = 100;
1400 progressInfo.total = 100;
1401 }
1402 retcode = copyout((caddr_t) &progressInfo,
1403 (caddr_t) *progressInfoPtr,
1404 sizeof(RF_ProgressInfo_t));
1405 return (retcode);
1406
1407 /* the sparetable daemon calls this to wait for the kernel to
1408 * need a spare table. this ioctl does not return until a
1409 * spare table is needed. XXX -- calling mpsleep here in the
1410 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1411 * -- I should either compute the spare table in the kernel,
1412 * or have a different -- XXX XXX -- interface (a different
1413 * character device) for delivering the table -- XXX */
1414 #if 0
1415 case RAIDFRAME_SPARET_WAIT:
1416 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1417 while (!rf_sparet_wait_queue)
1418 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1419 waitreq = rf_sparet_wait_queue;
1420 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1421 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1422
1423 /* structure assignment */
1424 *((RF_SparetWait_t *) data) = *waitreq;
1425
1426 RF_Free(waitreq, sizeof(*waitreq));
1427 return (0);
1428
1429 /* wakes up a process waiting on SPARET_WAIT and puts an error
1430 * code in it that will cause the dameon to exit */
1431 case RAIDFRAME_ABORT_SPARET_WAIT:
1432 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1433 waitreq->fcol = -1;
1434 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1435 waitreq->next = rf_sparet_wait_queue;
1436 rf_sparet_wait_queue = waitreq;
1437 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1438 wakeup(&rf_sparet_wait_queue);
1439 return (0);
1440
1441 /* used by the spare table daemon to deliver a spare table
1442 * into the kernel */
1443 case RAIDFRAME_SEND_SPARET:
1444
1445 /* install the spare table */
1446 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1447
1448 /* respond to the requestor. the return status of the spare
1449 * table installation is passed in the "fcol" field */
1450 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1451 waitreq->fcol = retcode;
1452 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1453 waitreq->next = rf_sparet_resp_queue;
1454 rf_sparet_resp_queue = waitreq;
1455 wakeup(&rf_sparet_resp_queue);
1456 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1457
1458 return (retcode);
1459 #endif
1460
1461 default:
1462 break; /* fall through to the os-specific code below */
1463
1464 }
1465
1466 if (!raidPtr->valid)
1467 return (EINVAL);
1468
1469 /*
1470 * Add support for "regular" device ioctls here.
1471 */
1472
1473 switch (cmd) {
1474 case DIOCGDINFO:
1475 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1476 break;
1477 #ifdef __HAVE_OLD_DISKLABEL
1478 case ODIOCGDINFO:
1479 newlabel = *(rs->sc_dkdev.dk_label);
1480 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1481 return ENOTTY;
1482 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1483 break;
1484 #endif
1485
1486 case DIOCGPART:
1487 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1488 ((struct partinfo *) data)->part =
1489 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1490 break;
1491
1492 case DIOCWDINFO:
1493 case DIOCSDINFO:
1494 #ifdef __HAVE_OLD_DISKLABEL
1495 case ODIOCWDINFO:
1496 case ODIOCSDINFO:
1497 #endif
1498 {
1499 struct disklabel *lp;
1500 #ifdef __HAVE_OLD_DISKLABEL
1501 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1502 memset(&newlabel, 0, sizeof newlabel);
1503 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1504 lp = &newlabel;
1505 } else
1506 #endif
1507 lp = (struct disklabel *)data;
1508
1509 if ((error = raidlock(rs)) != 0)
1510 return (error);
1511
1512 rs->sc_flags |= RAIDF_LABELLING;
1513
1514 error = setdisklabel(rs->sc_dkdev.dk_label,
1515 lp, 0, rs->sc_dkdev.dk_cpulabel);
1516 if (error == 0) {
1517 if (cmd == DIOCWDINFO
1518 #ifdef __HAVE_OLD_DISKLABEL
1519 || cmd == ODIOCWDINFO
1520 #endif
1521 )
1522 error = writedisklabel(RAIDLABELDEV(dev),
1523 raidstrategy, rs->sc_dkdev.dk_label,
1524 rs->sc_dkdev.dk_cpulabel);
1525 }
1526 rs->sc_flags &= ~RAIDF_LABELLING;
1527
1528 raidunlock(rs);
1529
1530 if (error)
1531 return (error);
1532 break;
1533 }
1534
1535 case DIOCWLABEL:
1536 if (*(int *) data != 0)
1537 rs->sc_flags |= RAIDF_WLABEL;
1538 else
1539 rs->sc_flags &= ~RAIDF_WLABEL;
1540 break;
1541
1542 case DIOCGDEFLABEL:
1543 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1544 break;
1545
1546 #ifdef __HAVE_OLD_DISKLABEL
1547 case ODIOCGDEFLABEL:
1548 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1549 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1550 return ENOTTY;
1551 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1552 break;
1553 #endif
1554
1555 default:
1556 retcode = ENOTTY;
1557 }
1558 return (retcode);
1559
1560 }
1561
1562
1563 /* raidinit -- complete the rest of the initialization for the
1564 RAIDframe device. */
1565
1566
1567 static void
1568 raidinit(raidPtr)
1569 RF_Raid_t *raidPtr;
1570 {
1571 struct raid_softc *rs;
1572 int unit;
1573
1574 unit = raidPtr->raidid;
1575
1576 rs = &raid_softc[unit];
1577
1578 /* XXX should check return code first... */
1579 rs->sc_flags |= RAIDF_INITED;
1580
1581 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1582
1583 rs->sc_dkdev.dk_name = rs->sc_xname;
1584
1585 /* disk_attach actually creates space for the CPU disklabel, among
1586 * other things, so it's critical to call this *BEFORE* we try putzing
1587 * with disklabels. */
1588
1589 disk_attach(&rs->sc_dkdev);
1590
1591 /* XXX There may be a weird interaction here between this, and
1592 * protectedSectors, as used in RAIDframe. */
1593
1594 rs->sc_size = raidPtr->totalSectors;
1595
1596 }
1597
1598 /* wake up the daemon & tell it to get us a spare table
1599 * XXX
1600 * the entries in the queues should be tagged with the raidPtr
1601 * so that in the extremely rare case that two recons happen at once,
1602 * we know for which device were requesting a spare table
1603 * XXX
1604 *
1605 * XXX This code is not currently used. GO
1606 */
1607 int
1608 rf_GetSpareTableFromDaemon(req)
1609 RF_SparetWait_t *req;
1610 {
1611 int retcode;
1612
1613 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1614 req->next = rf_sparet_wait_queue;
1615 rf_sparet_wait_queue = req;
1616 wakeup(&rf_sparet_wait_queue);
1617
1618 /* mpsleep unlocks the mutex */
1619 while (!rf_sparet_resp_queue) {
1620 tsleep(&rf_sparet_resp_queue, PRIBIO,
1621 "raidframe getsparetable", 0);
1622 }
1623 req = rf_sparet_resp_queue;
1624 rf_sparet_resp_queue = req->next;
1625 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1626
1627 retcode = req->fcol;
1628 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1629 * alloc'd */
1630 return (retcode);
1631 }
1632
1633 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1634 * bp & passes it down.
1635 * any calls originating in the kernel must use non-blocking I/O
1636 * do some extra sanity checking to return "appropriate" error values for
1637 * certain conditions (to make some standard utilities work)
1638 *
1639 * Formerly known as: rf_DoAccessKernel
1640 */
1641 void
1642 raidstart(raidPtr)
1643 RF_Raid_t *raidPtr;
1644 {
1645 RF_SectorCount_t num_blocks, pb, sum;
1646 RF_RaidAddr_t raid_addr;
1647 int retcode;
1648 struct partition *pp;
1649 daddr_t blocknum;
1650 int unit;
1651 struct raid_softc *rs;
1652 int do_async;
1653 struct buf *bp;
1654
1655 unit = raidPtr->raidid;
1656 rs = &raid_softc[unit];
1657
1658 /* quick check to see if anything has died recently */
1659 RF_LOCK_MUTEX(raidPtr->mutex);
1660 if (raidPtr->numNewFailures > 0) {
1661 rf_update_component_labels(raidPtr,
1662 RF_NORMAL_COMPONENT_UPDATE);
1663 raidPtr->numNewFailures--;
1664 }
1665
1666 /* Check to see if we're at the limit... */
1667 while (raidPtr->openings > 0) {
1668 RF_UNLOCK_MUTEX(raidPtr->mutex);
1669
1670 /* get the next item, if any, from the queue */
1671 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1672 /* nothing more to do */
1673 return;
1674 }
1675
1676 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1677 * partition.. Need to make it absolute to the underlying
1678 * device.. */
1679
1680 blocknum = bp->b_blkno;
1681 if (DISKPART(bp->b_dev) != RAW_PART) {
1682 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1683 blocknum += pp->p_offset;
1684 }
1685
1686 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1687 (int) blocknum));
1688
1689 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1690 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1691
1692 /* *THIS* is where we adjust what block we're going to...
1693 * but DO NOT TOUCH bp->b_blkno!!! */
1694 raid_addr = blocknum;
1695
1696 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1697 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1698 sum = raid_addr + num_blocks + pb;
1699 if (1 || rf_debugKernelAccess) {
1700 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1701 (int) raid_addr, (int) sum, (int) num_blocks,
1702 (int) pb, (int) bp->b_resid));
1703 }
1704 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1705 || (sum < num_blocks) || (sum < pb)) {
1706 bp->b_error = ENOSPC;
1707 bp->b_flags |= B_ERROR;
1708 bp->b_resid = bp->b_bcount;
1709 biodone(bp);
1710 RF_LOCK_MUTEX(raidPtr->mutex);
1711 continue;
1712 }
1713 /*
1714 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1715 */
1716
1717 if (bp->b_bcount & raidPtr->sectorMask) {
1718 bp->b_error = EINVAL;
1719 bp->b_flags |= B_ERROR;
1720 bp->b_resid = bp->b_bcount;
1721 biodone(bp);
1722 RF_LOCK_MUTEX(raidPtr->mutex);
1723 continue;
1724
1725 }
1726 db1_printf(("Calling DoAccess..\n"));
1727
1728
1729 RF_LOCK_MUTEX(raidPtr->mutex);
1730 raidPtr->openings--;
1731 RF_UNLOCK_MUTEX(raidPtr->mutex);
1732
1733 /*
1734 * Everything is async.
1735 */
1736 do_async = 1;
1737
1738 disk_busy(&rs->sc_dkdev);
1739
1740 /* XXX we're still at splbio() here... do we *really*
1741 need to be? */
1742
1743 /* don't ever condition on bp->b_flags & B_WRITE.
1744 * always condition on B_READ instead */
1745
1746 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1747 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1748 do_async, raid_addr, num_blocks,
1749 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1750
1751 RF_LOCK_MUTEX(raidPtr->mutex);
1752 }
1753 RF_UNLOCK_MUTEX(raidPtr->mutex);
1754 }
1755
1756
1757
1758
1759 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1760
1761 int
1762 rf_DispatchKernelIO(queue, req)
1763 RF_DiskQueue_t *queue;
1764 RF_DiskQueueData_t *req;
1765 {
1766 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1767 struct buf *bp;
1768 struct raidbuf *raidbp = NULL;
1769 struct raid_softc *rs;
1770 int unit;
1771 int s;
1772
1773 s=0;
1774 /* s = splbio();*/ /* want to test this */
1775 /* XXX along with the vnode, we also need the softc associated with
1776 * this device.. */
1777
1778 req->queue = queue;
1779
1780 unit = queue->raidPtr->raidid;
1781
1782 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1783
1784 if (unit >= numraid) {
1785 printf("Invalid unit number: %d %d\n", unit, numraid);
1786 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1787 }
1788 rs = &raid_softc[unit];
1789
1790 bp = req->bp;
1791 #if 1
1792 /* XXX when there is a physical disk failure, someone is passing us a
1793 * buffer that contains old stuff!! Attempt to deal with this problem
1794 * without taking a performance hit... (not sure where the real bug
1795 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1796
1797 if (bp->b_flags & B_ERROR) {
1798 bp->b_flags &= ~B_ERROR;
1799 }
1800 if (bp->b_error != 0) {
1801 bp->b_error = 0;
1802 }
1803 #endif
1804 raidbp = RAIDGETBUF(rs);
1805
1806 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1807
1808 /*
1809 * context for raidiodone
1810 */
1811 raidbp->rf_obp = bp;
1812 raidbp->req = req;
1813
1814 LIST_INIT(&raidbp->rf_buf.b_dep);
1815
1816 switch (req->type) {
1817 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1818 /* XXX need to do something extra here.. */
1819 /* I'm leaving this in, as I've never actually seen it used,
1820 * and I'd like folks to report it... GO */
1821 printf(("WAKEUP CALLED\n"));
1822 queue->numOutstanding++;
1823
1824 /* XXX need to glue the original buffer into this?? */
1825
1826 KernelWakeupFunc(&raidbp->rf_buf);
1827 break;
1828
1829 case RF_IO_TYPE_READ:
1830 case RF_IO_TYPE_WRITE:
1831
1832 if (req->tracerec) {
1833 RF_ETIMER_START(req->tracerec->timer);
1834 }
1835 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1836 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1837 req->sectorOffset, req->numSector,
1838 req->buf, KernelWakeupFunc, (void *) req,
1839 queue->raidPtr->logBytesPerSector, req->b_proc);
1840
1841 if (rf_debugKernelAccess) {
1842 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1843 (long) bp->b_blkno));
1844 }
1845 queue->numOutstanding++;
1846 queue->last_deq_sector = req->sectorOffset;
1847 /* acc wouldn't have been let in if there were any pending
1848 * reqs at any other priority */
1849 queue->curPriority = req->priority;
1850
1851 db1_printf(("Going for %c to unit %d row %d col %d\n",
1852 req->type, unit, queue->row, queue->col));
1853 db1_printf(("sector %d count %d (%d bytes) %d\n",
1854 (int) req->sectorOffset, (int) req->numSector,
1855 (int) (req->numSector <<
1856 queue->raidPtr->logBytesPerSector),
1857 (int) queue->raidPtr->logBytesPerSector));
1858 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1859 raidbp->rf_buf.b_vp->v_numoutput++;
1860 }
1861 VOP_STRATEGY(&raidbp->rf_buf);
1862
1863 break;
1864
1865 default:
1866 panic("bad req->type in rf_DispatchKernelIO");
1867 }
1868 db1_printf(("Exiting from DispatchKernelIO\n"));
1869 /* splx(s); */ /* want to test this */
1870 return (0);
1871 }
1872 /* this is the callback function associated with a I/O invoked from
1873 kernel code.
1874 */
1875 static void
1876 KernelWakeupFunc(vbp)
1877 struct buf *vbp;
1878 {
1879 RF_DiskQueueData_t *req = NULL;
1880 RF_DiskQueue_t *queue;
1881 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1882 struct buf *bp;
1883 struct raid_softc *rs;
1884 int unit;
1885 int s;
1886
1887 s = splbio();
1888 db1_printf(("recovering the request queue:\n"));
1889 req = raidbp->req;
1890
1891 bp = raidbp->rf_obp;
1892
1893 queue = (RF_DiskQueue_t *) req->queue;
1894
1895 if (raidbp->rf_buf.b_flags & B_ERROR) {
1896 bp->b_flags |= B_ERROR;
1897 bp->b_error = raidbp->rf_buf.b_error ?
1898 raidbp->rf_buf.b_error : EIO;
1899 }
1900
1901 /* XXX methinks this could be wrong... */
1902 #if 1
1903 bp->b_resid = raidbp->rf_buf.b_resid;
1904 #endif
1905
1906 if (req->tracerec) {
1907 RF_ETIMER_STOP(req->tracerec->timer);
1908 RF_ETIMER_EVAL(req->tracerec->timer);
1909 RF_LOCK_MUTEX(rf_tracing_mutex);
1910 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1911 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1912 req->tracerec->num_phys_ios++;
1913 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1914 }
1915 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1916
1917 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1918
1919
1920 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1921 * ballistic, and mark the component as hosed... */
1922
1923 if (bp->b_flags & B_ERROR) {
1924 /* Mark the disk as dead */
1925 /* but only mark it once... */
1926 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1927 rf_ds_optimal) {
1928 printf("raid%d: IO Error. Marking %s as failed.\n",
1929 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1930 queue->raidPtr->Disks[queue->row][queue->col].status =
1931 rf_ds_failed;
1932 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1933 queue->raidPtr->numFailures++;
1934 queue->raidPtr->numNewFailures++;
1935 } else { /* Disk is already dead... */
1936 /* printf("Disk already marked as dead!\n"); */
1937 }
1938
1939 }
1940
1941 rs = &raid_softc[unit];
1942 RAIDPUTBUF(rs, raidbp);
1943
1944 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1945 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1946
1947 splx(s);
1948 }
1949
1950
1951
1952 /*
1953 * initialize a buf structure for doing an I/O in the kernel.
1954 */
1955 static void
1956 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1957 logBytesPerSector, b_proc)
1958 struct buf *bp;
1959 struct vnode *b_vp;
1960 unsigned rw_flag;
1961 dev_t dev;
1962 RF_SectorNum_t startSect;
1963 RF_SectorCount_t numSect;
1964 caddr_t buf;
1965 void (*cbFunc) (struct buf *);
1966 void *cbArg;
1967 int logBytesPerSector;
1968 struct proc *b_proc;
1969 {
1970 /* bp->b_flags = B_PHYS | rw_flag; */
1971 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1972 bp->b_bcount = numSect << logBytesPerSector;
1973 bp->b_bufsize = bp->b_bcount;
1974 bp->b_error = 0;
1975 bp->b_dev = dev;
1976 bp->b_data = buf;
1977 bp->b_blkno = startSect;
1978 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1979 if (bp->b_bcount == 0) {
1980 panic("bp->b_bcount is zero in InitBP!!\n");
1981 }
1982 bp->b_proc = b_proc;
1983 bp->b_iodone = cbFunc;
1984 bp->b_vp = b_vp;
1985
1986 }
1987
1988 static void
1989 raidgetdefaultlabel(raidPtr, rs, lp)
1990 RF_Raid_t *raidPtr;
1991 struct raid_softc *rs;
1992 struct disklabel *lp;
1993 {
1994 db1_printf(("Building a default label...\n"));
1995 memset(lp, 0, sizeof(*lp));
1996
1997 /* fabricate a label... */
1998 lp->d_secperunit = raidPtr->totalSectors;
1999 lp->d_secsize = raidPtr->bytesPerSector;
2000 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2001 lp->d_ntracks = 4 * raidPtr->numCol;
2002 lp->d_ncylinders = raidPtr->totalSectors /
2003 (lp->d_nsectors * lp->d_ntracks);
2004 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2005
2006 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2007 lp->d_type = DTYPE_RAID;
2008 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2009 lp->d_rpm = 3600;
2010 lp->d_interleave = 1;
2011 lp->d_flags = 0;
2012
2013 lp->d_partitions[RAW_PART].p_offset = 0;
2014 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2015 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2016 lp->d_npartitions = RAW_PART + 1;
2017
2018 lp->d_magic = DISKMAGIC;
2019 lp->d_magic2 = DISKMAGIC;
2020 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2021
2022 }
2023 /*
2024 * Read the disklabel from the raid device. If one is not present, fake one
2025 * up.
2026 */
2027 static void
2028 raidgetdisklabel(dev)
2029 dev_t dev;
2030 {
2031 int unit = raidunit(dev);
2032 struct raid_softc *rs = &raid_softc[unit];
2033 char *errstring;
2034 struct disklabel *lp = rs->sc_dkdev.dk_label;
2035 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2036 RF_Raid_t *raidPtr;
2037
2038 db1_printf(("Getting the disklabel...\n"));
2039
2040 memset(clp, 0, sizeof(*clp));
2041
2042 raidPtr = raidPtrs[unit];
2043
2044 raidgetdefaultlabel(raidPtr, rs, lp);
2045
2046 /*
2047 * Call the generic disklabel extraction routine.
2048 */
2049 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2050 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2051 if (errstring)
2052 raidmakedisklabel(rs);
2053 else {
2054 int i;
2055 struct partition *pp;
2056
2057 /*
2058 * Sanity check whether the found disklabel is valid.
2059 *
2060 * This is necessary since total size of the raid device
2061 * may vary when an interleave is changed even though exactly
2062 * same componets are used, and old disklabel may used
2063 * if that is found.
2064 */
2065 if (lp->d_secperunit != rs->sc_size)
2066 printf("raid%d: WARNING: %s: "
2067 "total sector size in disklabel (%d) != "
2068 "the size of raid (%ld)\n", unit, rs->sc_xname,
2069 lp->d_secperunit, (long) rs->sc_size);
2070 for (i = 0; i < lp->d_npartitions; i++) {
2071 pp = &lp->d_partitions[i];
2072 if (pp->p_offset + pp->p_size > rs->sc_size)
2073 printf("raid%d: WARNING: %s: end of partition `%c' "
2074 "exceeds the size of raid (%ld)\n",
2075 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2076 }
2077 }
2078
2079 }
2080 /*
2081 * Take care of things one might want to take care of in the event
2082 * that a disklabel isn't present.
2083 */
2084 static void
2085 raidmakedisklabel(rs)
2086 struct raid_softc *rs;
2087 {
2088 struct disklabel *lp = rs->sc_dkdev.dk_label;
2089 db1_printf(("Making a label..\n"));
2090
2091 /*
2092 * For historical reasons, if there's no disklabel present
2093 * the raw partition must be marked FS_BSDFFS.
2094 */
2095
2096 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2097
2098 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2099
2100 lp->d_checksum = dkcksum(lp);
2101 }
2102 /*
2103 * Lookup the provided name in the filesystem. If the file exists,
2104 * is a valid block device, and isn't being used by anyone else,
2105 * set *vpp to the file's vnode.
2106 * You'll find the original of this in ccd.c
2107 */
2108 int
2109 raidlookup(path, p, vpp)
2110 char *path;
2111 struct proc *p;
2112 struct vnode **vpp; /* result */
2113 {
2114 struct nameidata nd;
2115 struct vnode *vp;
2116 struct vattr va;
2117 int error;
2118
2119 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2120 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2121 #if 0
2122 printf("RAIDframe: vn_open returned %d\n", error);
2123 #endif
2124 return (error);
2125 }
2126 vp = nd.ni_vp;
2127 if (vp->v_usecount > 1) {
2128 VOP_UNLOCK(vp, 0);
2129 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2130 return (EBUSY);
2131 }
2132 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2133 VOP_UNLOCK(vp, 0);
2134 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2135 return (error);
2136 }
2137 /* XXX: eventually we should handle VREG, too. */
2138 if (va.va_type != VBLK) {
2139 VOP_UNLOCK(vp, 0);
2140 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2141 return (ENOTBLK);
2142 }
2143 VOP_UNLOCK(vp, 0);
2144 *vpp = vp;
2145 return (0);
2146 }
2147 /*
2148 * Wait interruptibly for an exclusive lock.
2149 *
2150 * XXX
2151 * Several drivers do this; it should be abstracted and made MP-safe.
2152 * (Hmm... where have we seen this warning before :-> GO )
2153 */
2154 static int
2155 raidlock(rs)
2156 struct raid_softc *rs;
2157 {
2158 int error;
2159
2160 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2161 rs->sc_flags |= RAIDF_WANTED;
2162 if ((error =
2163 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2164 return (error);
2165 }
2166 rs->sc_flags |= RAIDF_LOCKED;
2167 return (0);
2168 }
2169 /*
2170 * Unlock and wake up any waiters.
2171 */
2172 static void
2173 raidunlock(rs)
2174 struct raid_softc *rs;
2175 {
2176
2177 rs->sc_flags &= ~RAIDF_LOCKED;
2178 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2179 rs->sc_flags &= ~RAIDF_WANTED;
2180 wakeup(rs);
2181 }
2182 }
2183
2184
2185 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2186 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2187
2188 int
2189 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2190 {
2191 RF_ComponentLabel_t clabel;
2192 raidread_component_label(dev, b_vp, &clabel);
2193 clabel.mod_counter = mod_counter;
2194 clabel.clean = RF_RAID_CLEAN;
2195 raidwrite_component_label(dev, b_vp, &clabel);
2196 return(0);
2197 }
2198
2199
2200 int
2201 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2202 {
2203 RF_ComponentLabel_t clabel;
2204 raidread_component_label(dev, b_vp, &clabel);
2205 clabel.mod_counter = mod_counter;
2206 clabel.clean = RF_RAID_DIRTY;
2207 raidwrite_component_label(dev, b_vp, &clabel);
2208 return(0);
2209 }
2210
2211 /* ARGSUSED */
2212 int
2213 raidread_component_label(dev, b_vp, clabel)
2214 dev_t dev;
2215 struct vnode *b_vp;
2216 RF_ComponentLabel_t *clabel;
2217 {
2218 struct buf *bp;
2219 const struct bdevsw *bdev;
2220 int error;
2221
2222 /* XXX should probably ensure that we don't try to do this if
2223 someone has changed rf_protected_sectors. */
2224
2225 if (b_vp == NULL) {
2226 /* For whatever reason, this component is not valid.
2227 Don't try to read a component label from it. */
2228 return(EINVAL);
2229 }
2230
2231 /* get a block of the appropriate size... */
2232 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2233 bp->b_dev = dev;
2234
2235 /* get our ducks in a row for the read */
2236 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2237 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2238 bp->b_flags |= B_READ;
2239 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2240
2241 bdev = bdevsw_lookup(bp->b_dev);
2242 if (bdev == NULL)
2243 return (ENXIO);
2244 (*bdev->d_strategy)(bp);
2245
2246 error = biowait(bp);
2247
2248 if (!error) {
2249 memcpy(clabel, bp->b_data,
2250 sizeof(RF_ComponentLabel_t));
2251 #if 0
2252 rf_print_component_label( clabel );
2253 #endif
2254 } else {
2255 #if 0
2256 printf("Failed to read RAID component label!\n");
2257 #endif
2258 }
2259
2260 brelse(bp);
2261 return(error);
2262 }
2263 /* ARGSUSED */
2264 int
2265 raidwrite_component_label(dev, b_vp, clabel)
2266 dev_t dev;
2267 struct vnode *b_vp;
2268 RF_ComponentLabel_t *clabel;
2269 {
2270 struct buf *bp;
2271 const struct bdevsw *bdev;
2272 int error;
2273
2274 /* get a block of the appropriate size... */
2275 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2276 bp->b_dev = dev;
2277
2278 /* get our ducks in a row for the write */
2279 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2280 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2281 bp->b_flags |= B_WRITE;
2282 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2283
2284 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2285
2286 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2287
2288 bdev = bdevsw_lookup(bp->b_dev);
2289 if (bdev == NULL)
2290 return (ENXIO);
2291 (*bdev->d_strategy)(bp);
2292 error = biowait(bp);
2293 brelse(bp);
2294 if (error) {
2295 #if 1
2296 printf("Failed to write RAID component info!\n");
2297 #endif
2298 }
2299
2300 return(error);
2301 }
2302
2303 void
2304 rf_markalldirty(raidPtr)
2305 RF_Raid_t *raidPtr;
2306 {
2307 RF_ComponentLabel_t clabel;
2308 int r,c;
2309
2310 raidPtr->mod_counter++;
2311 for (r = 0; r < raidPtr->numRow; r++) {
2312 for (c = 0; c < raidPtr->numCol; c++) {
2313 /* we don't want to touch (at all) a disk that has
2314 failed */
2315 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2316 raidread_component_label(
2317 raidPtr->Disks[r][c].dev,
2318 raidPtr->raid_cinfo[r][c].ci_vp,
2319 &clabel);
2320 if (clabel.status == rf_ds_spared) {
2321 /* XXX do something special...
2322 but whatever you do, don't
2323 try to access it!! */
2324 } else {
2325 #if 0
2326 clabel.status =
2327 raidPtr->Disks[r][c].status;
2328 raidwrite_component_label(
2329 raidPtr->Disks[r][c].dev,
2330 raidPtr->raid_cinfo[r][c].ci_vp,
2331 &clabel);
2332 #endif
2333 raidmarkdirty(
2334 raidPtr->Disks[r][c].dev,
2335 raidPtr->raid_cinfo[r][c].ci_vp,
2336 raidPtr->mod_counter);
2337 }
2338 }
2339 }
2340 }
2341 /* printf("Component labels marked dirty.\n"); */
2342 #if 0
2343 for( c = 0; c < raidPtr->numSpare ; c++) {
2344 sparecol = raidPtr->numCol + c;
2345 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2346 /*
2347
2348 XXX this is where we get fancy and map this spare
2349 into it's correct spot in the array.
2350
2351 */
2352 /*
2353
2354 we claim this disk is "optimal" if it's
2355 rf_ds_used_spare, as that means it should be
2356 directly substitutable for the disk it replaced.
2357 We note that too...
2358
2359 */
2360
2361 for(i=0;i<raidPtr->numRow;i++) {
2362 for(j=0;j<raidPtr->numCol;j++) {
2363 if ((raidPtr->Disks[i][j].spareRow ==
2364 r) &&
2365 (raidPtr->Disks[i][j].spareCol ==
2366 sparecol)) {
2367 srow = r;
2368 scol = sparecol;
2369 break;
2370 }
2371 }
2372 }
2373
2374 raidread_component_label(
2375 raidPtr->Disks[r][sparecol].dev,
2376 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2377 &clabel);
2378 /* make sure status is noted */
2379 clabel.version = RF_COMPONENT_LABEL_VERSION;
2380 clabel.mod_counter = raidPtr->mod_counter;
2381 clabel.serial_number = raidPtr->serial_number;
2382 clabel.row = srow;
2383 clabel.column = scol;
2384 clabel.num_rows = raidPtr->numRow;
2385 clabel.num_columns = raidPtr->numCol;
2386 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2387 clabel.status = rf_ds_optimal;
2388 raidwrite_component_label(
2389 raidPtr->Disks[r][sparecol].dev,
2390 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2391 &clabel);
2392 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2393 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2394 }
2395 }
2396
2397 #endif
2398 }
2399
2400
2401 void
2402 rf_update_component_labels(raidPtr, final)
2403 RF_Raid_t *raidPtr;
2404 int final;
2405 {
2406 RF_ComponentLabel_t clabel;
2407 int sparecol;
2408 int r,c;
2409 int i,j;
2410 int srow, scol;
2411
2412 srow = -1;
2413 scol = -1;
2414
2415 /* XXX should do extra checks to make sure things really are clean,
2416 rather than blindly setting the clean bit... */
2417
2418 raidPtr->mod_counter++;
2419
2420 for (r = 0; r < raidPtr->numRow; r++) {
2421 for (c = 0; c < raidPtr->numCol; c++) {
2422 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2423 raidread_component_label(
2424 raidPtr->Disks[r][c].dev,
2425 raidPtr->raid_cinfo[r][c].ci_vp,
2426 &clabel);
2427 /* make sure status is noted */
2428 clabel.status = rf_ds_optimal;
2429 /* bump the counter */
2430 clabel.mod_counter = raidPtr->mod_counter;
2431
2432 raidwrite_component_label(
2433 raidPtr->Disks[r][c].dev,
2434 raidPtr->raid_cinfo[r][c].ci_vp,
2435 &clabel);
2436 if (final == RF_FINAL_COMPONENT_UPDATE) {
2437 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2438 raidmarkclean(
2439 raidPtr->Disks[r][c].dev,
2440 raidPtr->raid_cinfo[r][c].ci_vp,
2441 raidPtr->mod_counter);
2442 }
2443 }
2444 }
2445 /* else we don't touch it.. */
2446 }
2447 }
2448
2449 for( c = 0; c < raidPtr->numSpare ; c++) {
2450 sparecol = raidPtr->numCol + c;
2451 /* Need to ensure that the reconstruct actually completed! */
2452 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2453 /*
2454
2455 we claim this disk is "optimal" if it's
2456 rf_ds_used_spare, as that means it should be
2457 directly substitutable for the disk it replaced.
2458 We note that too...
2459
2460 */
2461
2462 for(i=0;i<raidPtr->numRow;i++) {
2463 for(j=0;j<raidPtr->numCol;j++) {
2464 if ((raidPtr->Disks[i][j].spareRow ==
2465 0) &&
2466 (raidPtr->Disks[i][j].spareCol ==
2467 sparecol)) {
2468 srow = i;
2469 scol = j;
2470 break;
2471 }
2472 }
2473 }
2474
2475 /* XXX shouldn't *really* need this... */
2476 raidread_component_label(
2477 raidPtr->Disks[0][sparecol].dev,
2478 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2479 &clabel);
2480 /* make sure status is noted */
2481
2482 raid_init_component_label(raidPtr, &clabel);
2483
2484 clabel.mod_counter = raidPtr->mod_counter;
2485 clabel.row = srow;
2486 clabel.column = scol;
2487 clabel.status = rf_ds_optimal;
2488
2489 raidwrite_component_label(
2490 raidPtr->Disks[0][sparecol].dev,
2491 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2492 &clabel);
2493 if (final == RF_FINAL_COMPONENT_UPDATE) {
2494 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2495 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2496 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2497 raidPtr->mod_counter);
2498 }
2499 }
2500 }
2501 }
2502 /* printf("Component labels updated\n"); */
2503 }
2504
2505 void
2506 rf_close_component(raidPtr, vp, auto_configured)
2507 RF_Raid_t *raidPtr;
2508 struct vnode *vp;
2509 int auto_configured;
2510 {
2511 struct proc *p;
2512
2513 p = raidPtr->engine_thread;
2514
2515 if (vp != NULL) {
2516 if (auto_configured == 1) {
2517 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2518 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2519 vput(vp);
2520
2521 } else {
2522 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2523 }
2524 } else {
2525 #if 0
2526 printf("vnode was NULL\n");
2527 #endif
2528 }
2529 }
2530
2531
2532 void
2533 rf_UnconfigureVnodes(raidPtr)
2534 RF_Raid_t *raidPtr;
2535 {
2536 int r,c;
2537 struct proc *p;
2538 struct vnode *vp;
2539 int acd;
2540
2541
2542 /* We take this opportunity to close the vnodes like we should.. */
2543
2544 p = raidPtr->engine_thread;
2545
2546 for (r = 0; r < raidPtr->numRow; r++) {
2547 for (c = 0; c < raidPtr->numCol; c++) {
2548 #if 0
2549 printf("raid%d: Closing vnode for row: %d col: %d\n",
2550 raidPtr->raidid, r, c);
2551 #endif
2552 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2553 acd = raidPtr->Disks[r][c].auto_configured;
2554 rf_close_component(raidPtr, vp, acd);
2555 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2556 raidPtr->Disks[r][c].auto_configured = 0;
2557 }
2558 }
2559 for (r = 0; r < raidPtr->numSpare; r++) {
2560 #if 0
2561 printf("raid%d: Closing vnode for spare: %d\n",
2562 raidPtr->raidid, r);
2563 #endif
2564 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2565 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2566 rf_close_component(raidPtr, vp, acd);
2567 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2568 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2569 }
2570 }
2571
2572
2573 void
2574 rf_ReconThread(req)
2575 struct rf_recon_req *req;
2576 {
2577 int s;
2578 RF_Raid_t *raidPtr;
2579
2580 s = splbio();
2581 raidPtr = (RF_Raid_t *) req->raidPtr;
2582 raidPtr->recon_in_progress = 1;
2583
2584 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2585 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2586
2587 /* XXX get rid of this! we don't need it at all.. */
2588 RF_Free(req, sizeof(*req));
2589
2590 raidPtr->recon_in_progress = 0;
2591 splx(s);
2592
2593 /* That's all... */
2594 kthread_exit(0); /* does not return */
2595 }
2596
2597 void
2598 rf_RewriteParityThread(raidPtr)
2599 RF_Raid_t *raidPtr;
2600 {
2601 int retcode;
2602 int s;
2603
2604 raidPtr->parity_rewrite_in_progress = 1;
2605 s = splbio();
2606 retcode = rf_RewriteParity(raidPtr);
2607 splx(s);
2608 if (retcode) {
2609 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2610 } else {
2611 /* set the clean bit! If we shutdown correctly,
2612 the clean bit on each component label will get
2613 set */
2614 raidPtr->parity_good = RF_RAID_CLEAN;
2615 }
2616 raidPtr->parity_rewrite_in_progress = 0;
2617
2618 /* Anyone waiting for us to stop? If so, inform them... */
2619 if (raidPtr->waitShutdown) {
2620 wakeup(&raidPtr->parity_rewrite_in_progress);
2621 }
2622
2623 /* That's all... */
2624 kthread_exit(0); /* does not return */
2625 }
2626
2627
2628 void
2629 rf_CopybackThread(raidPtr)
2630 RF_Raid_t *raidPtr;
2631 {
2632 int s;
2633
2634 raidPtr->copyback_in_progress = 1;
2635 s = splbio();
2636 rf_CopybackReconstructedData(raidPtr);
2637 splx(s);
2638 raidPtr->copyback_in_progress = 0;
2639
2640 /* That's all... */
2641 kthread_exit(0); /* does not return */
2642 }
2643
2644
2645 void
2646 rf_ReconstructInPlaceThread(req)
2647 struct rf_recon_req *req;
2648 {
2649 int retcode;
2650 int s;
2651 RF_Raid_t *raidPtr;
2652
2653 s = splbio();
2654 raidPtr = req->raidPtr;
2655 raidPtr->recon_in_progress = 1;
2656 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2657 RF_Free(req, sizeof(*req));
2658 raidPtr->recon_in_progress = 0;
2659 splx(s);
2660
2661 /* That's all... */
2662 kthread_exit(0); /* does not return */
2663 }
2664
2665 RF_AutoConfig_t *
2666 rf_find_raid_components()
2667 {
2668 struct vnode *vp;
2669 struct disklabel label;
2670 struct device *dv;
2671 dev_t dev;
2672 int bmajor;
2673 int error;
2674 int i;
2675 int good_one;
2676 RF_ComponentLabel_t *clabel;
2677 RF_AutoConfig_t *ac_list;
2678 RF_AutoConfig_t *ac;
2679
2680
2681 /* initialize the AutoConfig list */
2682 ac_list = NULL;
2683
2684 /* we begin by trolling through *all* the devices on the system */
2685
2686 for (dv = alldevs.tqh_first; dv != NULL;
2687 dv = dv->dv_list.tqe_next) {
2688
2689 /* we are only interested in disks... */
2690 if (dv->dv_class != DV_DISK)
2691 continue;
2692
2693 /* we don't care about floppies... */
2694 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2695 continue;
2696 }
2697
2698 /* we don't care about CD's... */
2699 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2700 continue;
2701 }
2702
2703 /* hdfd is the Atari/Hades floppy driver */
2704 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2705 continue;
2706 }
2707 /* fdisa is the Atari/Milan floppy driver */
2708 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2709 continue;
2710 }
2711
2712 /* need to find the device_name_to_block_device_major stuff */
2713 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2714
2715 /* get a vnode for the raw partition of this disk */
2716
2717 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2718 if (bdevvp(dev, &vp))
2719 panic("RAID can't alloc vnode");
2720
2721 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2722
2723 if (error) {
2724 /* "Who cares." Continue looking
2725 for something that exists*/
2726 vput(vp);
2727 continue;
2728 }
2729
2730 /* Ok, the disk exists. Go get the disklabel. */
2731 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2732 FREAD, NOCRED, 0);
2733 if (error) {
2734 /*
2735 * XXX can't happen - open() would
2736 * have errored out (or faked up one)
2737 */
2738 printf("can't get label for dev %s%c (%d)!?!?\n",
2739 dv->dv_xname, 'a' + RAW_PART, error);
2740 }
2741
2742 /* don't need this any more. We'll allocate it again
2743 a little later if we really do... */
2744 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2745 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2746 vput(vp);
2747
2748 for (i=0; i < label.d_npartitions; i++) {
2749 /* We only support partitions marked as RAID */
2750 if (label.d_partitions[i].p_fstype != FS_RAID)
2751 continue;
2752
2753 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2754 if (bdevvp(dev, &vp))
2755 panic("RAID can't alloc vnode");
2756
2757 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2758 if (error) {
2759 /* Whatever... */
2760 vput(vp);
2761 continue;
2762 }
2763
2764 good_one = 0;
2765
2766 clabel = (RF_ComponentLabel_t *)
2767 malloc(sizeof(RF_ComponentLabel_t),
2768 M_RAIDFRAME, M_NOWAIT);
2769 if (clabel == NULL) {
2770 /* XXX CLEANUP HERE */
2771 printf("RAID auto config: out of memory!\n");
2772 return(NULL); /* XXX probably should panic? */
2773 }
2774
2775 if (!raidread_component_label(dev, vp, clabel)) {
2776 /* Got the label. Does it look reasonable? */
2777 if (rf_reasonable_label(clabel) &&
2778 (clabel->partitionSize <=
2779 label.d_partitions[i].p_size)) {
2780 #if DEBUG
2781 printf("Component on: %s%c: %d\n",
2782 dv->dv_xname, 'a'+i,
2783 label.d_partitions[i].p_size);
2784 rf_print_component_label(clabel);
2785 #endif
2786 /* if it's reasonable, add it,
2787 else ignore it. */
2788 ac = (RF_AutoConfig_t *)
2789 malloc(sizeof(RF_AutoConfig_t),
2790 M_RAIDFRAME,
2791 M_NOWAIT);
2792 if (ac == NULL) {
2793 /* XXX should panic?? */
2794 return(NULL);
2795 }
2796
2797 sprintf(ac->devname, "%s%c",
2798 dv->dv_xname, 'a'+i);
2799 ac->dev = dev;
2800 ac->vp = vp;
2801 ac->clabel = clabel;
2802 ac->next = ac_list;
2803 ac_list = ac;
2804 good_one = 1;
2805 }
2806 }
2807 if (!good_one) {
2808 /* cleanup */
2809 free(clabel, M_RAIDFRAME);
2810 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2811 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2812 vput(vp);
2813 }
2814 }
2815 }
2816 return(ac_list);
2817 }
2818
2819 static int
2820 rf_reasonable_label(clabel)
2821 RF_ComponentLabel_t *clabel;
2822 {
2823
2824 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2825 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2826 ((clabel->clean == RF_RAID_CLEAN) ||
2827 (clabel->clean == RF_RAID_DIRTY)) &&
2828 clabel->row >=0 &&
2829 clabel->column >= 0 &&
2830 clabel->num_rows > 0 &&
2831 clabel->num_columns > 0 &&
2832 clabel->row < clabel->num_rows &&
2833 clabel->column < clabel->num_columns &&
2834 clabel->blockSize > 0 &&
2835 clabel->numBlocks > 0) {
2836 /* label looks reasonable enough... */
2837 return(1);
2838 }
2839 return(0);
2840 }
2841
2842
2843 void
2844 rf_print_component_label(clabel)
2845 RF_ComponentLabel_t *clabel;
2846 {
2847 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2848 clabel->row, clabel->column,
2849 clabel->num_rows, clabel->num_columns);
2850 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2851 clabel->version, clabel->serial_number,
2852 clabel->mod_counter);
2853 printf(" Clean: %s Status: %d\n",
2854 clabel->clean ? "Yes" : "No", clabel->status );
2855 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2856 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2857 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2858 (char) clabel->parityConfig, clabel->blockSize,
2859 clabel->numBlocks);
2860 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2861 printf(" Contains root partition: %s\n",
2862 clabel->root_partition ? "Yes" : "No" );
2863 printf(" Last configured as: raid%d\n", clabel->last_unit );
2864 #if 0
2865 printf(" Config order: %d\n", clabel->config_order);
2866 #endif
2867
2868 }
2869
2870 RF_ConfigSet_t *
2871 rf_create_auto_sets(ac_list)
2872 RF_AutoConfig_t *ac_list;
2873 {
2874 RF_AutoConfig_t *ac;
2875 RF_ConfigSet_t *config_sets;
2876 RF_ConfigSet_t *cset;
2877 RF_AutoConfig_t *ac_next;
2878
2879
2880 config_sets = NULL;
2881
2882 /* Go through the AutoConfig list, and figure out which components
2883 belong to what sets. */
2884 ac = ac_list;
2885 while(ac!=NULL) {
2886 /* we're going to putz with ac->next, so save it here
2887 for use at the end of the loop */
2888 ac_next = ac->next;
2889
2890 if (config_sets == NULL) {
2891 /* will need at least this one... */
2892 config_sets = (RF_ConfigSet_t *)
2893 malloc(sizeof(RF_ConfigSet_t),
2894 M_RAIDFRAME, M_NOWAIT);
2895 if (config_sets == NULL) {
2896 panic("rf_create_auto_sets: No memory!\n");
2897 }
2898 /* this one is easy :) */
2899 config_sets->ac = ac;
2900 config_sets->next = NULL;
2901 config_sets->rootable = 0;
2902 ac->next = NULL;
2903 } else {
2904 /* which set does this component fit into? */
2905 cset = config_sets;
2906 while(cset!=NULL) {
2907 if (rf_does_it_fit(cset, ac)) {
2908 /* looks like it matches... */
2909 ac->next = cset->ac;
2910 cset->ac = ac;
2911 break;
2912 }
2913 cset = cset->next;
2914 }
2915 if (cset==NULL) {
2916 /* didn't find a match above... new set..*/
2917 cset = (RF_ConfigSet_t *)
2918 malloc(sizeof(RF_ConfigSet_t),
2919 M_RAIDFRAME, M_NOWAIT);
2920 if (cset == NULL) {
2921 panic("rf_create_auto_sets: No memory!\n");
2922 }
2923 cset->ac = ac;
2924 ac->next = NULL;
2925 cset->next = config_sets;
2926 cset->rootable = 0;
2927 config_sets = cset;
2928 }
2929 }
2930 ac = ac_next;
2931 }
2932
2933
2934 return(config_sets);
2935 }
2936
2937 static int
2938 rf_does_it_fit(cset, ac)
2939 RF_ConfigSet_t *cset;
2940 RF_AutoConfig_t *ac;
2941 {
2942 RF_ComponentLabel_t *clabel1, *clabel2;
2943
2944 /* If this one matches the *first* one in the set, that's good
2945 enough, since the other members of the set would have been
2946 through here too... */
2947 /* note that we are not checking partitionSize here..
2948
2949 Note that we are also not checking the mod_counters here.
2950 If everything else matches execpt the mod_counter, that's
2951 good enough for this test. We will deal with the mod_counters
2952 a little later in the autoconfiguration process.
2953
2954 (clabel1->mod_counter == clabel2->mod_counter) &&
2955
2956 The reason we don't check for this is that failed disks
2957 will have lower modification counts. If those disks are
2958 not added to the set they used to belong to, then they will
2959 form their own set, which may result in 2 different sets,
2960 for example, competing to be configured at raid0, and
2961 perhaps competing to be the root filesystem set. If the
2962 wrong ones get configured, or both attempt to become /,
2963 weird behaviour and or serious lossage will occur. Thus we
2964 need to bring them into the fold here, and kick them out at
2965 a later point.
2966
2967 */
2968
2969 clabel1 = cset->ac->clabel;
2970 clabel2 = ac->clabel;
2971 if ((clabel1->version == clabel2->version) &&
2972 (clabel1->serial_number == clabel2->serial_number) &&
2973 (clabel1->num_rows == clabel2->num_rows) &&
2974 (clabel1->num_columns == clabel2->num_columns) &&
2975 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2976 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2977 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2978 (clabel1->parityConfig == clabel2->parityConfig) &&
2979 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2980 (clabel1->blockSize == clabel2->blockSize) &&
2981 (clabel1->numBlocks == clabel2->numBlocks) &&
2982 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2983 (clabel1->root_partition == clabel2->root_partition) &&
2984 (clabel1->last_unit == clabel2->last_unit) &&
2985 (clabel1->config_order == clabel2->config_order)) {
2986 /* if it get's here, it almost *has* to be a match */
2987 } else {
2988 /* it's not consistent with somebody in the set..
2989 punt */
2990 return(0);
2991 }
2992 /* all was fine.. it must fit... */
2993 return(1);
2994 }
2995
2996 int
2997 rf_have_enough_components(cset)
2998 RF_ConfigSet_t *cset;
2999 {
3000 RF_AutoConfig_t *ac;
3001 RF_AutoConfig_t *auto_config;
3002 RF_ComponentLabel_t *clabel;
3003 int r,c;
3004 int num_rows;
3005 int num_cols;
3006 int num_missing;
3007 int mod_counter;
3008 int mod_counter_found;
3009 int even_pair_failed;
3010 char parity_type;
3011
3012
3013 /* check to see that we have enough 'live' components
3014 of this set. If so, we can configure it if necessary */
3015
3016 num_rows = cset->ac->clabel->num_rows;
3017 num_cols = cset->ac->clabel->num_columns;
3018 parity_type = cset->ac->clabel->parityConfig;
3019
3020 /* XXX Check for duplicate components!?!?!? */
3021
3022 /* Determine what the mod_counter is supposed to be for this set. */
3023
3024 mod_counter_found = 0;
3025 mod_counter = 0;
3026 ac = cset->ac;
3027 while(ac!=NULL) {
3028 if (mod_counter_found==0) {
3029 mod_counter = ac->clabel->mod_counter;
3030 mod_counter_found = 1;
3031 } else {
3032 if (ac->clabel->mod_counter > mod_counter) {
3033 mod_counter = ac->clabel->mod_counter;
3034 }
3035 }
3036 ac = ac->next;
3037 }
3038
3039 num_missing = 0;
3040 auto_config = cset->ac;
3041
3042 for(r=0; r<num_rows; r++) {
3043 even_pair_failed = 0;
3044 for(c=0; c<num_cols; c++) {
3045 ac = auto_config;
3046 while(ac!=NULL) {
3047 if ((ac->clabel->row == r) &&
3048 (ac->clabel->column == c) &&
3049 (ac->clabel->mod_counter == mod_counter)) {
3050 /* it's this one... */
3051 #if DEBUG
3052 printf("Found: %s at %d,%d\n",
3053 ac->devname,r,c);
3054 #endif
3055 break;
3056 }
3057 ac=ac->next;
3058 }
3059 if (ac==NULL) {
3060 /* Didn't find one here! */
3061 /* special case for RAID 1, especially
3062 where there are more than 2
3063 components (where RAIDframe treats
3064 things a little differently :( ) */
3065 if (parity_type == '1') {
3066 if (c%2 == 0) { /* even component */
3067 even_pair_failed = 1;
3068 } else { /* odd component. If
3069 we're failed, and
3070 so is the even
3071 component, it's
3072 "Good Night, Charlie" */
3073 if (even_pair_failed == 1) {
3074 return(0);
3075 }
3076 }
3077 } else {
3078 /* normal accounting */
3079 num_missing++;
3080 }
3081 }
3082 if ((parity_type == '1') && (c%2 == 1)) {
3083 /* Just did an even component, and we didn't
3084 bail.. reset the even_pair_failed flag,
3085 and go on to the next component.... */
3086 even_pair_failed = 0;
3087 }
3088 }
3089 }
3090
3091 clabel = cset->ac->clabel;
3092
3093 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3094 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3095 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3096 /* XXX this needs to be made *much* more general */
3097 /* Too many failures */
3098 return(0);
3099 }
3100 /* otherwise, all is well, and we've got enough to take a kick
3101 at autoconfiguring this set */
3102 return(1);
3103 }
3104
3105 void
3106 rf_create_configuration(ac,config,raidPtr)
3107 RF_AutoConfig_t *ac;
3108 RF_Config_t *config;
3109 RF_Raid_t *raidPtr;
3110 {
3111 RF_ComponentLabel_t *clabel;
3112 int i;
3113
3114 clabel = ac->clabel;
3115
3116 /* 1. Fill in the common stuff */
3117 config->numRow = clabel->num_rows;
3118 config->numCol = clabel->num_columns;
3119 config->numSpare = 0; /* XXX should this be set here? */
3120 config->sectPerSU = clabel->sectPerSU;
3121 config->SUsPerPU = clabel->SUsPerPU;
3122 config->SUsPerRU = clabel->SUsPerRU;
3123 config->parityConfig = clabel->parityConfig;
3124 /* XXX... */
3125 strcpy(config->diskQueueType,"fifo");
3126 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3127 config->layoutSpecificSize = 0; /* XXX ?? */
3128
3129 while(ac!=NULL) {
3130 /* row/col values will be in range due to the checks
3131 in reasonable_label() */
3132 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3133 ac->devname);
3134 ac = ac->next;
3135 }
3136
3137 for(i=0;i<RF_MAXDBGV;i++) {
3138 config->debugVars[i][0] = NULL;
3139 }
3140 }
3141
3142 int
3143 rf_set_autoconfig(raidPtr, new_value)
3144 RF_Raid_t *raidPtr;
3145 int new_value;
3146 {
3147 RF_ComponentLabel_t clabel;
3148 struct vnode *vp;
3149 dev_t dev;
3150 int row, column;
3151
3152 raidPtr->autoconfigure = new_value;
3153 for(row=0; row<raidPtr->numRow; row++) {
3154 for(column=0; column<raidPtr->numCol; column++) {
3155 if (raidPtr->Disks[row][column].status ==
3156 rf_ds_optimal) {
3157 dev = raidPtr->Disks[row][column].dev;
3158 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3159 raidread_component_label(dev, vp, &clabel);
3160 clabel.autoconfigure = new_value;
3161 raidwrite_component_label(dev, vp, &clabel);
3162 }
3163 }
3164 }
3165 return(new_value);
3166 }
3167
3168 int
3169 rf_set_rootpartition(raidPtr, new_value)
3170 RF_Raid_t *raidPtr;
3171 int new_value;
3172 {
3173 RF_ComponentLabel_t clabel;
3174 struct vnode *vp;
3175 dev_t dev;
3176 int row, column;
3177
3178 raidPtr->root_partition = new_value;
3179 for(row=0; row<raidPtr->numRow; row++) {
3180 for(column=0; column<raidPtr->numCol; column++) {
3181 if (raidPtr->Disks[row][column].status ==
3182 rf_ds_optimal) {
3183 dev = raidPtr->Disks[row][column].dev;
3184 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3185 raidread_component_label(dev, vp, &clabel);
3186 clabel.root_partition = new_value;
3187 raidwrite_component_label(dev, vp, &clabel);
3188 }
3189 }
3190 }
3191 return(new_value);
3192 }
3193
3194 void
3195 rf_release_all_vps(cset)
3196 RF_ConfigSet_t *cset;
3197 {
3198 RF_AutoConfig_t *ac;
3199
3200 ac = cset->ac;
3201 while(ac!=NULL) {
3202 /* Close the vp, and give it back */
3203 if (ac->vp) {
3204 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3205 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3206 vput(ac->vp);
3207 ac->vp = NULL;
3208 }
3209 ac = ac->next;
3210 }
3211 }
3212
3213
3214 void
3215 rf_cleanup_config_set(cset)
3216 RF_ConfigSet_t *cset;
3217 {
3218 RF_AutoConfig_t *ac;
3219 RF_AutoConfig_t *next_ac;
3220
3221 ac = cset->ac;
3222 while(ac!=NULL) {
3223 next_ac = ac->next;
3224 /* nuke the label */
3225 free(ac->clabel, M_RAIDFRAME);
3226 /* cleanup the config structure */
3227 free(ac, M_RAIDFRAME);
3228 /* "next.." */
3229 ac = next_ac;
3230 }
3231 /* and, finally, nuke the config set */
3232 free(cset, M_RAIDFRAME);
3233 }
3234
3235
3236 void
3237 raid_init_component_label(raidPtr, clabel)
3238 RF_Raid_t *raidPtr;
3239 RF_ComponentLabel_t *clabel;
3240 {
3241 /* current version number */
3242 clabel->version = RF_COMPONENT_LABEL_VERSION;
3243 clabel->serial_number = raidPtr->serial_number;
3244 clabel->mod_counter = raidPtr->mod_counter;
3245 clabel->num_rows = raidPtr->numRow;
3246 clabel->num_columns = raidPtr->numCol;
3247 clabel->clean = RF_RAID_DIRTY; /* not clean */
3248 clabel->status = rf_ds_optimal; /* "It's good!" */
3249
3250 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3251 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3252 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3253
3254 clabel->blockSize = raidPtr->bytesPerSector;
3255 clabel->numBlocks = raidPtr->sectorsPerDisk;
3256
3257 /* XXX not portable */
3258 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3259 clabel->maxOutstanding = raidPtr->maxOutstanding;
3260 clabel->autoconfigure = raidPtr->autoconfigure;
3261 clabel->root_partition = raidPtr->root_partition;
3262 clabel->last_unit = raidPtr->raidid;
3263 clabel->config_order = raidPtr->config_order;
3264 }
3265
3266 int
3267 rf_auto_config_set(cset,unit)
3268 RF_ConfigSet_t *cset;
3269 int *unit;
3270 {
3271 RF_Raid_t *raidPtr;
3272 RF_Config_t *config;
3273 int raidID;
3274 int retcode;
3275
3276 #if DEBUG
3277 printf("RAID autoconfigure\n");
3278 #endif
3279
3280 retcode = 0;
3281 *unit = -1;
3282
3283 /* 1. Create a config structure */
3284
3285 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3286 M_RAIDFRAME,
3287 M_NOWAIT);
3288 if (config==NULL) {
3289 printf("Out of mem!?!?\n");
3290 /* XXX do something more intelligent here. */
3291 return(1);
3292 }
3293
3294 memset(config, 0, sizeof(RF_Config_t));
3295
3296 /*
3297 2. Figure out what RAID ID this one is supposed to live at
3298 See if we can get the same RAID dev that it was configured
3299 on last time..
3300 */
3301
3302 raidID = cset->ac->clabel->last_unit;
3303 if ((raidID < 0) || (raidID >= numraid)) {
3304 /* let's not wander off into lala land. */
3305 raidID = numraid - 1;
3306 }
3307 if (raidPtrs[raidID]->valid != 0) {
3308
3309 /*
3310 Nope... Go looking for an alternative...
3311 Start high so we don't immediately use raid0 if that's
3312 not taken.
3313 */
3314
3315 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3316 if (raidPtrs[raidID]->valid == 0) {
3317 /* can use this one! */
3318 break;
3319 }
3320 }
3321 }
3322
3323 if (raidID < 0) {
3324 /* punt... */
3325 printf("Unable to auto configure this set!\n");
3326 printf("(Out of RAID devs!)\n");
3327 return(1);
3328 }
3329
3330 #if DEBUG
3331 printf("Configuring raid%d:\n",raidID);
3332 #endif
3333
3334 raidPtr = raidPtrs[raidID];
3335
3336 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3337 raidPtr->raidid = raidID;
3338 raidPtr->openings = RAIDOUTSTANDING;
3339
3340 /* 3. Build the configuration structure */
3341 rf_create_configuration(cset->ac, config, raidPtr);
3342
3343 /* 4. Do the configuration */
3344 retcode = rf_Configure(raidPtr, config, cset->ac);
3345
3346 if (retcode == 0) {
3347
3348 raidinit(raidPtrs[raidID]);
3349
3350 rf_markalldirty(raidPtrs[raidID]);
3351 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3352 if (cset->ac->clabel->root_partition==1) {
3353 /* everything configured just fine. Make a note
3354 that this set is eligible to be root. */
3355 cset->rootable = 1;
3356 /* XXX do this here? */
3357 raidPtrs[raidID]->root_partition = 1;
3358 }
3359 }
3360
3361 /* 5. Cleanup */
3362 free(config, M_RAIDFRAME);
3363
3364 *unit = raidID;
3365 return(retcode);
3366 }
3367
3368 void
3369 rf_disk_unbusy(desc)
3370 RF_RaidAccessDesc_t *desc;
3371 {
3372 struct buf *bp;
3373
3374 bp = (struct buf *)desc->bp;
3375 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3376 (bp->b_bcount - bp->b_resid));
3377 }
3378