rf_netbsdkintf.c revision 1.104.2.14 1 /* $NetBSD: rf_netbsdkintf.c,v 1.104.2.14 2002/08/13 02:19:53 nathanw Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.104.2.14 2002/08/13 02:19:53 nathanw Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/proc.h>
123 #include <sys/queue.h>
124 #include <sys/disk.h>
125 #include <sys/device.h>
126 #include <sys/stat.h>
127 #include <sys/ioctl.h>
128 #include <sys/fcntl.h>
129 #include <sys/systm.h>
130 #include <sys/namei.h>
131 #include <sys/vnode.h>
132 #include <sys/disklabel.h>
133 #include <sys/conf.h>
134 #include <sys/lock.h>
135 #include <sys/buf.h>
136 #include <sys/user.h>
137 #include <sys/reboot.h>
138
139 #include <dev/raidframe/raidframevar.h>
140 #include <dev/raidframe/raidframeio.h>
141 #include "raid.h"
142 #include "opt_raid_autoconfig.h"
143 #include "rf_raid.h"
144 #include "rf_copyback.h"
145 #include "rf_dag.h"
146 #include "rf_dagflags.h"
147 #include "rf_desc.h"
148 #include "rf_diskqueue.h"
149 #include "rf_etimer.h"
150 #include "rf_general.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_threadstuff.h"
156
157 int rf_kdebug_level = 0;
158
159 #ifdef DEBUG
160 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
161 #else /* DEBUG */
162 #define db1_printf(a) { }
163 #endif /* DEBUG */
164
165 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
166
167 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
168
169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
170 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
172 * installation process */
173
174 /* prototypes */
175 static void KernelWakeupFunc(struct buf * bp);
176 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
177 dev_t dev, RF_SectorNum_t startSect,
178 RF_SectorCount_t numSect, caddr_t buf,
179 void (*cbFunc) (struct buf *), void *cbArg,
180 int logBytesPerSector, struct proc * b_proc);
181 static void raidinit(RF_Raid_t *);
182
183 void raidattach(int);
184 int raidsize(dev_t);
185 int raidopen(dev_t, int, int, struct proc *);
186 int raidclose(dev_t, int, int, struct proc *);
187 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
188 int raidwrite(dev_t, struct uio *, int);
189 int raidread(dev_t, struct uio *, int);
190 void raidstrategy(struct buf *);
191 int raiddump(dev_t, daddr_t, caddr_t, size_t);
192
193 /*
194 * Pilfered from ccd.c
195 */
196
197 struct raidbuf {
198 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
199 struct buf *rf_obp; /* ptr. to original I/O buf */
200 int rf_flags; /* misc. flags */
201 RF_DiskQueueData_t *req;/* the request that this was part of.. */
202 };
203
204 /* component buffer pool */
205 struct pool raidframe_cbufpool;
206
207 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
208 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
209
210 /* XXX Not sure if the following should be replacing the raidPtrs above,
211 or if it should be used in conjunction with that...
212 */
213
214 struct raid_softc {
215 int sc_flags; /* flags */
216 int sc_cflags; /* configuration flags */
217 size_t sc_size; /* size of the raid device */
218 char sc_xname[20]; /* XXX external name */
219 struct disk sc_dkdev; /* generic disk device info */
220 struct bufq_state buf_queue; /* used for the device queue */
221 };
222 /* sc_flags */
223 #define RAIDF_INITED 0x01 /* unit has been initialized */
224 #define RAIDF_WLABEL 0x02 /* label area is writable */
225 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
226 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
227 #define RAIDF_LOCKED 0x80 /* unit is locked */
228
229 #define raidunit(x) DISKUNIT(x)
230 int numraid = 0;
231
232 /*
233 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
234 * Be aware that large numbers can allow the driver to consume a lot of
235 * kernel memory, especially on writes, and in degraded mode reads.
236 *
237 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
238 * a single 64K write will typically require 64K for the old data,
239 * 64K for the old parity, and 64K for the new parity, for a total
240 * of 192K (if the parity buffer is not re-used immediately).
241 * Even it if is used immediately, that's still 128K, which when multiplied
242 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
243 *
244 * Now in degraded mode, for example, a 64K read on the above setup may
245 * require data reconstruction, which will require *all* of the 4 remaining
246 * disks to participate -- 4 * 32K/disk == 128K again.
247 */
248
249 #ifndef RAIDOUTSTANDING
250 #define RAIDOUTSTANDING 6
251 #endif
252
253 #define RAIDLABELDEV(dev) \
254 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
255
256 /* declared here, and made public, for the benefit of KVM stuff.. */
257 struct raid_softc *raid_softc;
258
259 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
260 struct disklabel *);
261 static void raidgetdisklabel(dev_t);
262 static void raidmakedisklabel(struct raid_softc *);
263
264 static int raidlock(struct raid_softc *);
265 static void raidunlock(struct raid_softc *);
266
267 static void rf_markalldirty(RF_Raid_t *);
268 void rf_mountroot_hook(struct device *);
269
270 struct device *raidrootdev;
271
272 void rf_ReconThread(struct rf_recon_req *);
273 /* XXX what I want is: */
274 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
275 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
276 void rf_CopybackThread(RF_Raid_t *raidPtr);
277 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
278 void rf_buildroothack(void *);
279
280 RF_AutoConfig_t *rf_find_raid_components(void);
281 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
282 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
283 static int rf_reasonable_label(RF_ComponentLabel_t *);
284 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
285 int rf_set_autoconfig(RF_Raid_t *, int);
286 int rf_set_rootpartition(RF_Raid_t *, int);
287 void rf_release_all_vps(RF_ConfigSet_t *);
288 void rf_cleanup_config_set(RF_ConfigSet_t *);
289 int rf_have_enough_components(RF_ConfigSet_t *);
290 int rf_auto_config_set(RF_ConfigSet_t *, int *);
291
292 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
293 allow autoconfig to take place.
294 Note that this is overridden by having
295 RAID_AUTOCONFIG as an option in the
296 kernel config file. */
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304 RF_AutoConfig_t *ac_list; /* autoconfig list */
305 RF_ConfigSet_t *config_sets;
306
307 #ifdef DEBUG
308 printf("raidattach: Asked for %d units\n", num);
309 #endif
310
311 if (num <= 0) {
312 #ifdef DIAGNOSTIC
313 panic("raidattach: count <= 0");
314 #endif
315 return;
316 }
317 /* This is where all the initialization stuff gets done. */
318
319 numraid = num;
320
321 /* Make some space for requested number of units... */
322
323 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
324 if (raidPtrs == NULL) {
325 panic("raidPtrs is NULL!!\n");
326 }
327
328 /* Initialize the component buffer pool. */
329 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
330 0, 0, "raidpl", NULL);
331
332 rc = rf_mutex_init(&rf_sparet_wait_mutex);
333 if (rc) {
334 RF_PANIC();
335 }
336
337 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
338
339 for (i = 0; i < num; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357
358 memset(raid_softc, 0, num * sizeof(struct raid_softc));
359
360 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
361 M_RAIDFRAME, M_NOWAIT);
362 if (raidrootdev == NULL) {
363 panic("No memory for RAIDframe driver!!?!?!\n");
364 }
365
366 for (raidID = 0; raidID < num; raidID++) {
367 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
368
369 raidrootdev[raidID].dv_class = DV_DISK;
370 raidrootdev[raidID].dv_cfdata = NULL;
371 raidrootdev[raidID].dv_unit = raidID;
372 raidrootdev[raidID].dv_parent = NULL;
373 raidrootdev[raidID].dv_flags = 0;
374 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
375
376 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
377 (RF_Raid_t *));
378 if (raidPtrs[raidID] == NULL) {
379 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
380 numraid = raidID;
381 return;
382 }
383 }
384
385 #ifdef RAID_AUTOCONFIG
386 raidautoconfig = 1;
387 #endif
388
389 if (raidautoconfig) {
390 /* 1. locate all RAID components on the system */
391
392 #if DEBUG
393 printf("Searching for raid components...\n");
394 #endif
395 ac_list = rf_find_raid_components();
396
397 /* 2. sort them into their respective sets */
398
399 config_sets = rf_create_auto_sets(ac_list);
400
401 /* 3. evaluate each set and configure the valid ones
402 This gets done in rf_buildroothack() */
403
404 /* schedule the creation of the thread to do the
405 "/ on RAID" stuff */
406
407 kthread_create(rf_buildroothack,config_sets);
408
409 #if 0
410 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
411 #endif
412 }
413
414 }
415
416 void
417 rf_buildroothack(arg)
418 void *arg;
419 {
420 RF_ConfigSet_t *config_sets = arg;
421 RF_ConfigSet_t *cset;
422 RF_ConfigSet_t *next_cset;
423 int retcode;
424 int raidID;
425 int rootID;
426 int num_root;
427
428 rootID = 0;
429 num_root = 0;
430 cset = config_sets;
431 while(cset != NULL ) {
432 next_cset = cset->next;
433 if (rf_have_enough_components(cset) &&
434 cset->ac->clabel->autoconfigure==1) {
435 retcode = rf_auto_config_set(cset,&raidID);
436 if (!retcode) {
437 if (cset->rootable) {
438 rootID = raidID;
439 num_root++;
440 }
441 } else {
442 /* The autoconfig didn't work :( */
443 #if DEBUG
444 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
445 #endif
446 rf_release_all_vps(cset);
447 }
448 } else {
449 /* we're not autoconfiguring this set...
450 release the associated resources */
451 rf_release_all_vps(cset);
452 }
453 /* cleanup */
454 rf_cleanup_config_set(cset);
455 cset = next_cset;
456 }
457
458 /* we found something bootable... */
459
460 if (num_root == 1) {
461 booted_device = &raidrootdev[rootID];
462 } else if (num_root > 1) {
463 /* we can't guess.. require the user to answer... */
464 boothowto |= RB_ASKNAME;
465 }
466 }
467
468
469 int
470 raidsize(dev)
471 dev_t dev;
472 {
473 struct raid_softc *rs;
474 struct disklabel *lp;
475 int part, unit, omask, size;
476
477 unit = raidunit(dev);
478 if (unit >= numraid)
479 return (-1);
480 rs = &raid_softc[unit];
481
482 if ((rs->sc_flags & RAIDF_INITED) == 0)
483 return (-1);
484
485 part = DISKPART(dev);
486 omask = rs->sc_dkdev.dk_openmask & (1 << part);
487 lp = rs->sc_dkdev.dk_label;
488
489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
490 return (-1);
491
492 if (lp->d_partitions[part].p_fstype != FS_SWAP)
493 size = -1;
494 else
495 size = lp->d_partitions[part].p_size *
496 (lp->d_secsize / DEV_BSIZE);
497
498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
499 return (-1);
500
501 return (size);
502
503 }
504
505 int
506 raiddump(dev, blkno, va, size)
507 dev_t dev;
508 daddr_t blkno;
509 caddr_t va;
510 size_t size;
511 {
512 /* Not implemented. */
513 return ENXIO;
514 }
515 /* ARGSUSED */
516 int
517 raidopen(dev, flags, fmt, p)
518 dev_t dev;
519 int flags, fmt;
520 struct proc *p;
521 {
522 int unit = raidunit(dev);
523 struct raid_softc *rs;
524 struct disklabel *lp;
525 int part, pmask;
526 int error = 0;
527
528 if (unit >= numraid)
529 return (ENXIO);
530 rs = &raid_softc[unit];
531
532 if ((error = raidlock(rs)) != 0)
533 return (error);
534 lp = rs->sc_dkdev.dk_label;
535
536 part = DISKPART(dev);
537 pmask = (1 << part);
538
539 db1_printf(("Opening raid device number: %d partition: %d\n",
540 unit, part));
541
542
543 if ((rs->sc_flags & RAIDF_INITED) &&
544 (rs->sc_dkdev.dk_openmask == 0))
545 raidgetdisklabel(dev);
546
547 /* make sure that this partition exists */
548
549 if (part != RAW_PART) {
550 db1_printf(("Not a raw partition..\n"));
551 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
552 ((part >= lp->d_npartitions) ||
553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
554 error = ENXIO;
555 raidunlock(rs);
556 db1_printf(("Bailing out...\n"));
557 return (error);
558 }
559 }
560 /* Prevent this unit from being unconfigured while open. */
561 switch (fmt) {
562 case S_IFCHR:
563 rs->sc_dkdev.dk_copenmask |= pmask;
564 break;
565
566 case S_IFBLK:
567 rs->sc_dkdev.dk_bopenmask |= pmask;
568 break;
569 }
570
571 if ((rs->sc_dkdev.dk_openmask == 0) &&
572 ((rs->sc_flags & RAIDF_INITED) != 0)) {
573 /* First one... mark things as dirty... Note that we *MUST*
574 have done a configure before this. I DO NOT WANT TO BE
575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
576 THAT THEY BELONG TOGETHER!!!!! */
577 /* XXX should check to see if we're only open for reading
578 here... If so, we needn't do this, but then need some
579 other way of keeping track of what's happened.. */
580
581 rf_markalldirty( raidPtrs[unit] );
582 }
583
584
585 rs->sc_dkdev.dk_openmask =
586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
587
588 raidunlock(rs);
589
590 return (error);
591
592
593 }
594 /* ARGSUSED */
595 int
596 raidclose(dev, flags, fmt, p)
597 dev_t dev;
598 int flags, fmt;
599 struct proc *p;
600 {
601 int unit = raidunit(dev);
602 struct raid_softc *rs;
603 int error = 0;
604 int part;
605
606 if (unit >= numraid)
607 return (ENXIO);
608 rs = &raid_softc[unit];
609
610 if ((error = raidlock(rs)) != 0)
611 return (error);
612
613 part = DISKPART(dev);
614
615 /* ...that much closer to allowing unconfiguration... */
616 switch (fmt) {
617 case S_IFCHR:
618 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
619 break;
620
621 case S_IFBLK:
622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
623 break;
624 }
625 rs->sc_dkdev.dk_openmask =
626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
627
628 if ((rs->sc_dkdev.dk_openmask == 0) &&
629 ((rs->sc_flags & RAIDF_INITED) != 0)) {
630 /* Last one... device is not unconfigured yet.
631 Device shutdown has taken care of setting the
632 clean bits if RAIDF_INITED is not set
633 mark things as clean... */
634 #if 0
635 printf("Last one on raid%d. Updating status.\n",unit);
636 #endif
637 rf_update_component_labels(raidPtrs[unit],
638 RF_FINAL_COMPONENT_UPDATE);
639 if (doing_shutdown) {
640 /* last one, and we're going down, so
641 lights out for this RAID set too. */
642 error = rf_Shutdown(raidPtrs[unit]);
643
644 /* It's no longer initialized... */
645 rs->sc_flags &= ~RAIDF_INITED;
646
647 /* Detach the disk. */
648 disk_detach(&rs->sc_dkdev);
649 }
650 }
651
652 raidunlock(rs);
653 return (0);
654
655 }
656
657 void
658 raidstrategy(bp)
659 struct buf *bp;
660 {
661 int s;
662
663 unsigned int raidID = raidunit(bp->b_dev);
664 RF_Raid_t *raidPtr;
665 struct raid_softc *rs = &raid_softc[raidID];
666 struct disklabel *lp;
667 int wlabel;
668
669 if ((rs->sc_flags & RAIDF_INITED) ==0) {
670 bp->b_error = ENXIO;
671 bp->b_flags |= B_ERROR;
672 bp->b_resid = bp->b_bcount;
673 biodone(bp);
674 return;
675 }
676 if (raidID >= numraid || !raidPtrs[raidID]) {
677 bp->b_error = ENODEV;
678 bp->b_flags |= B_ERROR;
679 bp->b_resid = bp->b_bcount;
680 biodone(bp);
681 return;
682 }
683 raidPtr = raidPtrs[raidID];
684 if (!raidPtr->valid) {
685 bp->b_error = ENODEV;
686 bp->b_flags |= B_ERROR;
687 bp->b_resid = bp->b_bcount;
688 biodone(bp);
689 return;
690 }
691 if (bp->b_bcount == 0) {
692 db1_printf(("b_bcount is zero..\n"));
693 biodone(bp);
694 return;
695 }
696 lp = rs->sc_dkdev.dk_label;
697
698 /*
699 * Do bounds checking and adjust transfer. If there's an
700 * error, the bounds check will flag that for us.
701 */
702
703 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
704 if (DISKPART(bp->b_dev) != RAW_PART)
705 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
706 db1_printf(("Bounds check failed!!:%d %d\n",
707 (int) bp->b_blkno, (int) wlabel));
708 biodone(bp);
709 return;
710 }
711 s = splbio();
712
713 bp->b_resid = 0;
714
715 /* stuff it onto our queue */
716 BUFQ_PUT(&rs->buf_queue, bp);
717
718 raidstart(raidPtrs[raidID]);
719
720 splx(s);
721 }
722 /* ARGSUSED */
723 int
724 raidread(dev, uio, flags)
725 dev_t dev;
726 struct uio *uio;
727 int flags;
728 {
729 int unit = raidunit(dev);
730 struct raid_softc *rs;
731 int part;
732
733 if (unit >= numraid)
734 return (ENXIO);
735 rs = &raid_softc[unit];
736
737 if ((rs->sc_flags & RAIDF_INITED) == 0)
738 return (ENXIO);
739 part = DISKPART(dev);
740
741 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
742
743 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
744
745 }
746 /* ARGSUSED */
747 int
748 raidwrite(dev, uio, flags)
749 dev_t dev;
750 struct uio *uio;
751 int flags;
752 {
753 int unit = raidunit(dev);
754 struct raid_softc *rs;
755
756 if (unit >= numraid)
757 return (ENXIO);
758 rs = &raid_softc[unit];
759
760 if ((rs->sc_flags & RAIDF_INITED) == 0)
761 return (ENXIO);
762 db1_printf(("raidwrite\n"));
763 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
764
765 }
766
767 int
768 raidioctl(dev, cmd, data, flag, p)
769 dev_t dev;
770 u_long cmd;
771 caddr_t data;
772 int flag;
773 struct proc *p;
774 {
775 int unit = raidunit(dev);
776 int error = 0;
777 int part, pmask;
778 struct raid_softc *rs;
779 RF_Config_t *k_cfg, *u_cfg;
780 RF_Raid_t *raidPtr;
781 RF_RaidDisk_t *diskPtr;
782 RF_AccTotals_t *totals;
783 RF_DeviceConfig_t *d_cfg, **ucfgp;
784 u_char *specific_buf;
785 int retcode = 0;
786 int row;
787 int column;
788 int raidid;
789 struct rf_recon_req *rrcopy, *rr;
790 RF_ComponentLabel_t *clabel;
791 RF_ComponentLabel_t ci_label;
792 RF_ComponentLabel_t **clabel_ptr;
793 RF_SingleComponent_t *sparePtr,*componentPtr;
794 RF_SingleComponent_t hot_spare;
795 RF_SingleComponent_t component;
796 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
797 int i, j, d;
798 #ifdef __HAVE_OLD_DISKLABEL
799 struct disklabel newlabel;
800 #endif
801
802 if (unit >= numraid)
803 return (ENXIO);
804 rs = &raid_softc[unit];
805 raidPtr = raidPtrs[unit];
806
807 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
808 (int) DISKPART(dev), (int) unit, (int) cmd));
809
810 /* Must be open for writes for these commands... */
811 switch (cmd) {
812 case DIOCSDINFO:
813 case DIOCWDINFO:
814 #ifdef __HAVE_OLD_DISKLABEL
815 case ODIOCWDINFO:
816 case ODIOCSDINFO:
817 #endif
818 case DIOCWLABEL:
819 if ((flag & FWRITE) == 0)
820 return (EBADF);
821 }
822
823 /* Must be initialized for these... */
824 switch (cmd) {
825 case DIOCGDINFO:
826 case DIOCSDINFO:
827 case DIOCWDINFO:
828 #ifdef __HAVE_OLD_DISKLABEL
829 case ODIOCGDINFO:
830 case ODIOCWDINFO:
831 case ODIOCSDINFO:
832 case ODIOCGDEFLABEL:
833 #endif
834 case DIOCGPART:
835 case DIOCWLABEL:
836 case DIOCGDEFLABEL:
837 case RAIDFRAME_SHUTDOWN:
838 case RAIDFRAME_REWRITEPARITY:
839 case RAIDFRAME_GET_INFO:
840 case RAIDFRAME_RESET_ACCTOTALS:
841 case RAIDFRAME_GET_ACCTOTALS:
842 case RAIDFRAME_KEEP_ACCTOTALS:
843 case RAIDFRAME_GET_SIZE:
844 case RAIDFRAME_FAIL_DISK:
845 case RAIDFRAME_COPYBACK:
846 case RAIDFRAME_CHECK_RECON_STATUS:
847 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
848 case RAIDFRAME_GET_COMPONENT_LABEL:
849 case RAIDFRAME_SET_COMPONENT_LABEL:
850 case RAIDFRAME_ADD_HOT_SPARE:
851 case RAIDFRAME_REMOVE_HOT_SPARE:
852 case RAIDFRAME_INIT_LABELS:
853 case RAIDFRAME_REBUILD_IN_PLACE:
854 case RAIDFRAME_CHECK_PARITY:
855 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
857 case RAIDFRAME_CHECK_COPYBACK_STATUS:
858 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
859 case RAIDFRAME_SET_AUTOCONFIG:
860 case RAIDFRAME_SET_ROOT:
861 case RAIDFRAME_DELETE_COMPONENT:
862 case RAIDFRAME_INCORPORATE_HOT_SPARE:
863 if ((rs->sc_flags & RAIDF_INITED) == 0)
864 return (ENXIO);
865 }
866
867 switch (cmd) {
868
869 /* configure the system */
870 case RAIDFRAME_CONFIGURE:
871
872 if (raidPtr->valid) {
873 /* There is a valid RAID set running on this unit! */
874 printf("raid%d: Device already configured!\n",unit);
875 return(EINVAL);
876 }
877
878 /* copy-in the configuration information */
879 /* data points to a pointer to the configuration structure */
880
881 u_cfg = *((RF_Config_t **) data);
882 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
883 if (k_cfg == NULL) {
884 return (ENOMEM);
885 }
886 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
887 sizeof(RF_Config_t));
888 if (retcode) {
889 RF_Free(k_cfg, sizeof(RF_Config_t));
890 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
891 retcode));
892 return (retcode);
893 }
894 /* allocate a buffer for the layout-specific data, and copy it
895 * in */
896 if (k_cfg->layoutSpecificSize) {
897 if (k_cfg->layoutSpecificSize > 10000) {
898 /* sanity check */
899 RF_Free(k_cfg, sizeof(RF_Config_t));
900 return (EINVAL);
901 }
902 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
903 (u_char *));
904 if (specific_buf == NULL) {
905 RF_Free(k_cfg, sizeof(RF_Config_t));
906 return (ENOMEM);
907 }
908 retcode = copyin(k_cfg->layoutSpecific,
909 (caddr_t) specific_buf,
910 k_cfg->layoutSpecificSize);
911 if (retcode) {
912 RF_Free(k_cfg, sizeof(RF_Config_t));
913 RF_Free(specific_buf,
914 k_cfg->layoutSpecificSize);
915 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
916 retcode));
917 return (retcode);
918 }
919 } else
920 specific_buf = NULL;
921 k_cfg->layoutSpecific = specific_buf;
922
923 /* should do some kind of sanity check on the configuration.
924 * Store the sum of all the bytes in the last byte? */
925
926 /* configure the system */
927
928 /*
929 * Clear the entire RAID descriptor, just to make sure
930 * there is no stale data left in the case of a
931 * reconfiguration
932 */
933 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
934 raidPtr->raidid = unit;
935
936 retcode = rf_Configure(raidPtr, k_cfg, NULL);
937
938 if (retcode == 0) {
939
940 /* allow this many simultaneous IO's to
941 this RAID device */
942 raidPtr->openings = RAIDOUTSTANDING;
943
944 raidinit(raidPtr);
945 rf_markalldirty(raidPtr);
946 }
947 /* free the buffers. No return code here. */
948 if (k_cfg->layoutSpecificSize) {
949 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
950 }
951 RF_Free(k_cfg, sizeof(RF_Config_t));
952
953 return (retcode);
954
955 /* shutdown the system */
956 case RAIDFRAME_SHUTDOWN:
957
958 if ((error = raidlock(rs)) != 0)
959 return (error);
960
961 /*
962 * If somebody has a partition mounted, we shouldn't
963 * shutdown.
964 */
965
966 part = DISKPART(dev);
967 pmask = (1 << part);
968 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
969 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
970 (rs->sc_dkdev.dk_copenmask & pmask))) {
971 raidunlock(rs);
972 return (EBUSY);
973 }
974
975 retcode = rf_Shutdown(raidPtr);
976
977 /* It's no longer initialized... */
978 rs->sc_flags &= ~RAIDF_INITED;
979
980 /* Detach the disk. */
981 disk_detach(&rs->sc_dkdev);
982
983 raidunlock(rs);
984
985 return (retcode);
986 case RAIDFRAME_GET_COMPONENT_LABEL:
987 clabel_ptr = (RF_ComponentLabel_t **) data;
988 /* need to read the component label for the disk indicated
989 by row,column in clabel */
990
991 /* For practice, let's get it directly fromdisk, rather
992 than from the in-core copy */
993 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
994 (RF_ComponentLabel_t *));
995 if (clabel == NULL)
996 return (ENOMEM);
997
998 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
999
1000 retcode = copyin( *clabel_ptr, clabel,
1001 sizeof(RF_ComponentLabel_t));
1002
1003 if (retcode) {
1004 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1005 return(retcode);
1006 }
1007
1008 row = clabel->row;
1009 column = clabel->column;
1010
1011 if ((row < 0) || (row >= raidPtr->numRow) ||
1012 (column < 0) || (column >= raidPtr->numCol +
1013 raidPtr->numSpare)) {
1014 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1015 return(EINVAL);
1016 }
1017
1018 raidread_component_label(raidPtr->Disks[row][column].dev,
1019 raidPtr->raid_cinfo[row][column].ci_vp,
1020 clabel );
1021
1022 retcode = copyout((caddr_t) clabel,
1023 (caddr_t) *clabel_ptr,
1024 sizeof(RF_ComponentLabel_t));
1025 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1026 return (retcode);
1027
1028 case RAIDFRAME_SET_COMPONENT_LABEL:
1029 clabel = (RF_ComponentLabel_t *) data;
1030
1031 /* XXX check the label for valid stuff... */
1032 /* Note that some things *should not* get modified --
1033 the user should be re-initing the labels instead of
1034 trying to patch things.
1035 */
1036
1037 raidid = raidPtr->raidid;
1038 printf("raid%d: Got component label:\n", raidid);
1039 printf("raid%d: Version: %d\n", raidid, clabel->version);
1040 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1041 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1042 printf("raid%d: Row: %d\n", raidid, clabel->row);
1043 printf("raid%d: Column: %d\n", raidid, clabel->column);
1044 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1045 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1046 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1047 printf("raid%d: Status: %d\n", raidid, clabel->status);
1048
1049 row = clabel->row;
1050 column = clabel->column;
1051
1052 if ((row < 0) || (row >= raidPtr->numRow) ||
1053 (column < 0) || (column >= raidPtr->numCol)) {
1054 return(EINVAL);
1055 }
1056
1057 /* XXX this isn't allowed to do anything for now :-) */
1058
1059 /* XXX and before it is, we need to fill in the rest
1060 of the fields!?!?!?! */
1061 #if 0
1062 raidwrite_component_label(
1063 raidPtr->Disks[row][column].dev,
1064 raidPtr->raid_cinfo[row][column].ci_vp,
1065 clabel );
1066 #endif
1067 return (0);
1068
1069 case RAIDFRAME_INIT_LABELS:
1070 clabel = (RF_ComponentLabel_t *) data;
1071 /*
1072 we only want the serial number from
1073 the above. We get all the rest of the information
1074 from the config that was used to create this RAID
1075 set.
1076 */
1077
1078 raidPtr->serial_number = clabel->serial_number;
1079
1080 raid_init_component_label(raidPtr, &ci_label);
1081 ci_label.serial_number = clabel->serial_number;
1082
1083 for(row=0;row<raidPtr->numRow;row++) {
1084 ci_label.row = row;
1085 for(column=0;column<raidPtr->numCol;column++) {
1086 diskPtr = &raidPtr->Disks[row][column];
1087 if (!RF_DEAD_DISK(diskPtr->status)) {
1088 ci_label.partitionSize = diskPtr->partitionSize;
1089 ci_label.column = column;
1090 raidwrite_component_label(
1091 raidPtr->Disks[row][column].dev,
1092 raidPtr->raid_cinfo[row][column].ci_vp,
1093 &ci_label );
1094 }
1095 }
1096 }
1097
1098 return (retcode);
1099 case RAIDFRAME_SET_AUTOCONFIG:
1100 d = rf_set_autoconfig(raidPtr, *(int *) data);
1101 printf("raid%d: New autoconfig value is: %d\n",
1102 raidPtr->raidid, d);
1103 *(int *) data = d;
1104 return (retcode);
1105
1106 case RAIDFRAME_SET_ROOT:
1107 d = rf_set_rootpartition(raidPtr, *(int *) data);
1108 printf("raid%d: New rootpartition value is: %d\n",
1109 raidPtr->raidid, d);
1110 *(int *) data = d;
1111 return (retcode);
1112
1113 /* initialize all parity */
1114 case RAIDFRAME_REWRITEPARITY:
1115
1116 if (raidPtr->Layout.map->faultsTolerated == 0) {
1117 /* Parity for RAID 0 is trivially correct */
1118 raidPtr->parity_good = RF_RAID_CLEAN;
1119 return(0);
1120 }
1121
1122 if (raidPtr->parity_rewrite_in_progress == 1) {
1123 /* Re-write is already in progress! */
1124 return(EINVAL);
1125 }
1126
1127 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1128 rf_RewriteParityThread,
1129 raidPtr,"raid_parity");
1130 return (retcode);
1131
1132
1133 case RAIDFRAME_ADD_HOT_SPARE:
1134 sparePtr = (RF_SingleComponent_t *) data;
1135 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1136 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1137 return(retcode);
1138
1139 case RAIDFRAME_REMOVE_HOT_SPARE:
1140 return(retcode);
1141
1142 case RAIDFRAME_DELETE_COMPONENT:
1143 componentPtr = (RF_SingleComponent_t *)data;
1144 memcpy( &component, componentPtr,
1145 sizeof(RF_SingleComponent_t));
1146 retcode = rf_delete_component(raidPtr, &component);
1147 return(retcode);
1148
1149 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1150 componentPtr = (RF_SingleComponent_t *)data;
1151 memcpy( &component, componentPtr,
1152 sizeof(RF_SingleComponent_t));
1153 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1154 return(retcode);
1155
1156 case RAIDFRAME_REBUILD_IN_PLACE:
1157
1158 if (raidPtr->Layout.map->faultsTolerated == 0) {
1159 /* Can't do this on a RAID 0!! */
1160 return(EINVAL);
1161 }
1162
1163 if (raidPtr->recon_in_progress == 1) {
1164 /* a reconstruct is already in progress! */
1165 return(EINVAL);
1166 }
1167
1168 componentPtr = (RF_SingleComponent_t *) data;
1169 memcpy( &component, componentPtr,
1170 sizeof(RF_SingleComponent_t));
1171 row = component.row;
1172 column = component.column;
1173 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1174 row, column);
1175 if ((row < 0) || (row >= raidPtr->numRow) ||
1176 (column < 0) || (column >= raidPtr->numCol)) {
1177 return(EINVAL);
1178 }
1179
1180 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1181 if (rrcopy == NULL)
1182 return(ENOMEM);
1183
1184 rrcopy->raidPtr = (void *) raidPtr;
1185 rrcopy->row = row;
1186 rrcopy->col = column;
1187
1188 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1189 rf_ReconstructInPlaceThread,
1190 rrcopy,"raid_reconip");
1191 return(retcode);
1192
1193 case RAIDFRAME_GET_INFO:
1194 if (!raidPtr->valid)
1195 return (ENODEV);
1196 ucfgp = (RF_DeviceConfig_t **) data;
1197 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1198 (RF_DeviceConfig_t *));
1199 if (d_cfg == NULL)
1200 return (ENOMEM);
1201 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1202 d_cfg->rows = raidPtr->numRow;
1203 d_cfg->cols = raidPtr->numCol;
1204 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1205 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1206 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1207 return (ENOMEM);
1208 }
1209 d_cfg->nspares = raidPtr->numSpare;
1210 if (d_cfg->nspares >= RF_MAX_DISKS) {
1211 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1212 return (ENOMEM);
1213 }
1214 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1215 d = 0;
1216 for (i = 0; i < d_cfg->rows; i++) {
1217 for (j = 0; j < d_cfg->cols; j++) {
1218 d_cfg->devs[d] = raidPtr->Disks[i][j];
1219 d++;
1220 }
1221 }
1222 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1223 d_cfg->spares[i] = raidPtr->Disks[0][j];
1224 }
1225 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1226 sizeof(RF_DeviceConfig_t));
1227 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1228
1229 return (retcode);
1230
1231 case RAIDFRAME_CHECK_PARITY:
1232 *(int *) data = raidPtr->parity_good;
1233 return (0);
1234
1235 case RAIDFRAME_RESET_ACCTOTALS:
1236 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1237 return (0);
1238
1239 case RAIDFRAME_GET_ACCTOTALS:
1240 totals = (RF_AccTotals_t *) data;
1241 *totals = raidPtr->acc_totals;
1242 return (0);
1243
1244 case RAIDFRAME_KEEP_ACCTOTALS:
1245 raidPtr->keep_acc_totals = *(int *)data;
1246 return (0);
1247
1248 case RAIDFRAME_GET_SIZE:
1249 *(int *) data = raidPtr->totalSectors;
1250 return (0);
1251
1252 /* fail a disk & optionally start reconstruction */
1253 case RAIDFRAME_FAIL_DISK:
1254
1255 if (raidPtr->Layout.map->faultsTolerated == 0) {
1256 /* Can't do this on a RAID 0!! */
1257 return(EINVAL);
1258 }
1259
1260 rr = (struct rf_recon_req *) data;
1261
1262 if (rr->row < 0 || rr->row >= raidPtr->numRow
1263 || rr->col < 0 || rr->col >= raidPtr->numCol)
1264 return (EINVAL);
1265
1266 printf("raid%d: Failing the disk: row: %d col: %d\n",
1267 unit, rr->row, rr->col);
1268
1269 /* make a copy of the recon request so that we don't rely on
1270 * the user's buffer */
1271 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1272 if (rrcopy == NULL)
1273 return(ENOMEM);
1274 memcpy(rrcopy, rr, sizeof(*rr));
1275 rrcopy->raidPtr = (void *) raidPtr;
1276
1277 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1278 rf_ReconThread,
1279 rrcopy,"raid_recon");
1280 return (0);
1281
1282 /* invoke a copyback operation after recon on whatever disk
1283 * needs it, if any */
1284 case RAIDFRAME_COPYBACK:
1285
1286 if (raidPtr->Layout.map->faultsTolerated == 0) {
1287 /* This makes no sense on a RAID 0!! */
1288 return(EINVAL);
1289 }
1290
1291 if (raidPtr->copyback_in_progress == 1) {
1292 /* Copyback is already in progress! */
1293 return(EINVAL);
1294 }
1295
1296 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1297 rf_CopybackThread,
1298 raidPtr,"raid_copyback");
1299 return (retcode);
1300
1301 /* return the percentage completion of reconstruction */
1302 case RAIDFRAME_CHECK_RECON_STATUS:
1303 if (raidPtr->Layout.map->faultsTolerated == 0) {
1304 /* This makes no sense on a RAID 0, so tell the
1305 user it's done. */
1306 *(int *) data = 100;
1307 return(0);
1308 }
1309 row = 0; /* XXX we only consider a single row... */
1310 if (raidPtr->status[row] != rf_rs_reconstructing)
1311 *(int *) data = 100;
1312 else
1313 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1314 return (0);
1315 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1316 progressInfoPtr = (RF_ProgressInfo_t **) data;
1317 row = 0; /* XXX we only consider a single row... */
1318 if (raidPtr->status[row] != rf_rs_reconstructing) {
1319 progressInfo.remaining = 0;
1320 progressInfo.completed = 100;
1321 progressInfo.total = 100;
1322 } else {
1323 progressInfo.total =
1324 raidPtr->reconControl[row]->numRUsTotal;
1325 progressInfo.completed =
1326 raidPtr->reconControl[row]->numRUsComplete;
1327 progressInfo.remaining = progressInfo.total -
1328 progressInfo.completed;
1329 }
1330 retcode = copyout((caddr_t) &progressInfo,
1331 (caddr_t) *progressInfoPtr,
1332 sizeof(RF_ProgressInfo_t));
1333 return (retcode);
1334
1335 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1336 if (raidPtr->Layout.map->faultsTolerated == 0) {
1337 /* This makes no sense on a RAID 0, so tell the
1338 user it's done. */
1339 *(int *) data = 100;
1340 return(0);
1341 }
1342 if (raidPtr->parity_rewrite_in_progress == 1) {
1343 *(int *) data = 100 *
1344 raidPtr->parity_rewrite_stripes_done /
1345 raidPtr->Layout.numStripe;
1346 } else {
1347 *(int *) data = 100;
1348 }
1349 return (0);
1350
1351 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1352 progressInfoPtr = (RF_ProgressInfo_t **) data;
1353 if (raidPtr->parity_rewrite_in_progress == 1) {
1354 progressInfo.total = raidPtr->Layout.numStripe;
1355 progressInfo.completed =
1356 raidPtr->parity_rewrite_stripes_done;
1357 progressInfo.remaining = progressInfo.total -
1358 progressInfo.completed;
1359 } else {
1360 progressInfo.remaining = 0;
1361 progressInfo.completed = 100;
1362 progressInfo.total = 100;
1363 }
1364 retcode = copyout((caddr_t) &progressInfo,
1365 (caddr_t) *progressInfoPtr,
1366 sizeof(RF_ProgressInfo_t));
1367 return (retcode);
1368
1369 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1370 if (raidPtr->Layout.map->faultsTolerated == 0) {
1371 /* This makes no sense on a RAID 0 */
1372 *(int *) data = 100;
1373 return(0);
1374 }
1375 if (raidPtr->copyback_in_progress == 1) {
1376 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1377 raidPtr->Layout.numStripe;
1378 } else {
1379 *(int *) data = 100;
1380 }
1381 return (0);
1382
1383 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1384 progressInfoPtr = (RF_ProgressInfo_t **) data;
1385 if (raidPtr->copyback_in_progress == 1) {
1386 progressInfo.total = raidPtr->Layout.numStripe;
1387 progressInfo.completed =
1388 raidPtr->copyback_stripes_done;
1389 progressInfo.remaining = progressInfo.total -
1390 progressInfo.completed;
1391 } else {
1392 progressInfo.remaining = 0;
1393 progressInfo.completed = 100;
1394 progressInfo.total = 100;
1395 }
1396 retcode = copyout((caddr_t) &progressInfo,
1397 (caddr_t) *progressInfoPtr,
1398 sizeof(RF_ProgressInfo_t));
1399 return (retcode);
1400
1401 /* the sparetable daemon calls this to wait for the kernel to
1402 * need a spare table. this ioctl does not return until a
1403 * spare table is needed. XXX -- calling mpsleep here in the
1404 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1405 * -- I should either compute the spare table in the kernel,
1406 * or have a different -- XXX XXX -- interface (a different
1407 * character device) for delivering the table -- XXX */
1408 #if 0
1409 case RAIDFRAME_SPARET_WAIT:
1410 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1411 while (!rf_sparet_wait_queue)
1412 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1413 waitreq = rf_sparet_wait_queue;
1414 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1415 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1416
1417 /* structure assignment */
1418 *((RF_SparetWait_t *) data) = *waitreq;
1419
1420 RF_Free(waitreq, sizeof(*waitreq));
1421 return (0);
1422
1423 /* wakes up a process waiting on SPARET_WAIT and puts an error
1424 * code in it that will cause the dameon to exit */
1425 case RAIDFRAME_ABORT_SPARET_WAIT:
1426 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1427 waitreq->fcol = -1;
1428 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1429 waitreq->next = rf_sparet_wait_queue;
1430 rf_sparet_wait_queue = waitreq;
1431 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1432 wakeup(&rf_sparet_wait_queue);
1433 return (0);
1434
1435 /* used by the spare table daemon to deliver a spare table
1436 * into the kernel */
1437 case RAIDFRAME_SEND_SPARET:
1438
1439 /* install the spare table */
1440 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1441
1442 /* respond to the requestor. the return status of the spare
1443 * table installation is passed in the "fcol" field */
1444 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1445 waitreq->fcol = retcode;
1446 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1447 waitreq->next = rf_sparet_resp_queue;
1448 rf_sparet_resp_queue = waitreq;
1449 wakeup(&rf_sparet_resp_queue);
1450 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1451
1452 return (retcode);
1453 #endif
1454
1455 default:
1456 break; /* fall through to the os-specific code below */
1457
1458 }
1459
1460 if (!raidPtr->valid)
1461 return (EINVAL);
1462
1463 /*
1464 * Add support for "regular" device ioctls here.
1465 */
1466
1467 switch (cmd) {
1468 case DIOCGDINFO:
1469 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1470 break;
1471 #ifdef __HAVE_OLD_DISKLABEL
1472 case ODIOCGDINFO:
1473 newlabel = *(rs->sc_dkdev.dk_label);
1474 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1475 return ENOTTY;
1476 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1477 break;
1478 #endif
1479
1480 case DIOCGPART:
1481 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1482 ((struct partinfo *) data)->part =
1483 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1484 break;
1485
1486 case DIOCWDINFO:
1487 case DIOCSDINFO:
1488 #ifdef __HAVE_OLD_DISKLABEL
1489 case ODIOCWDINFO:
1490 case ODIOCSDINFO:
1491 #endif
1492 {
1493 struct disklabel *lp;
1494 #ifdef __HAVE_OLD_DISKLABEL
1495 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1496 memset(&newlabel, 0, sizeof newlabel);
1497 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1498 lp = &newlabel;
1499 } else
1500 #endif
1501 lp = (struct disklabel *)data;
1502
1503 if ((error = raidlock(rs)) != 0)
1504 return (error);
1505
1506 rs->sc_flags |= RAIDF_LABELLING;
1507
1508 error = setdisklabel(rs->sc_dkdev.dk_label,
1509 lp, 0, rs->sc_dkdev.dk_cpulabel);
1510 if (error == 0) {
1511 if (cmd == DIOCWDINFO
1512 #ifdef __HAVE_OLD_DISKLABEL
1513 || cmd == ODIOCWDINFO
1514 #endif
1515 )
1516 error = writedisklabel(RAIDLABELDEV(dev),
1517 raidstrategy, rs->sc_dkdev.dk_label,
1518 rs->sc_dkdev.dk_cpulabel);
1519 }
1520 rs->sc_flags &= ~RAIDF_LABELLING;
1521
1522 raidunlock(rs);
1523
1524 if (error)
1525 return (error);
1526 break;
1527 }
1528
1529 case DIOCWLABEL:
1530 if (*(int *) data != 0)
1531 rs->sc_flags |= RAIDF_WLABEL;
1532 else
1533 rs->sc_flags &= ~RAIDF_WLABEL;
1534 break;
1535
1536 case DIOCGDEFLABEL:
1537 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1538 break;
1539
1540 #ifdef __HAVE_OLD_DISKLABEL
1541 case ODIOCGDEFLABEL:
1542 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1543 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1544 return ENOTTY;
1545 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1546 break;
1547 #endif
1548
1549 default:
1550 retcode = ENOTTY;
1551 }
1552 return (retcode);
1553
1554 }
1555
1556
1557 /* raidinit -- complete the rest of the initialization for the
1558 RAIDframe device. */
1559
1560
1561 static void
1562 raidinit(raidPtr)
1563 RF_Raid_t *raidPtr;
1564 {
1565 struct raid_softc *rs;
1566 int unit;
1567
1568 unit = raidPtr->raidid;
1569
1570 rs = &raid_softc[unit];
1571
1572 /* XXX should check return code first... */
1573 rs->sc_flags |= RAIDF_INITED;
1574
1575 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1576
1577 rs->sc_dkdev.dk_name = rs->sc_xname;
1578
1579 /* disk_attach actually creates space for the CPU disklabel, among
1580 * other things, so it's critical to call this *BEFORE* we try putzing
1581 * with disklabels. */
1582
1583 disk_attach(&rs->sc_dkdev);
1584
1585 /* XXX There may be a weird interaction here between this, and
1586 * protectedSectors, as used in RAIDframe. */
1587
1588 rs->sc_size = raidPtr->totalSectors;
1589
1590 }
1591
1592 /* wake up the daemon & tell it to get us a spare table
1593 * XXX
1594 * the entries in the queues should be tagged with the raidPtr
1595 * so that in the extremely rare case that two recons happen at once,
1596 * we know for which device were requesting a spare table
1597 * XXX
1598 *
1599 * XXX This code is not currently used. GO
1600 */
1601 int
1602 rf_GetSpareTableFromDaemon(req)
1603 RF_SparetWait_t *req;
1604 {
1605 int retcode;
1606
1607 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1608 req->next = rf_sparet_wait_queue;
1609 rf_sparet_wait_queue = req;
1610 wakeup(&rf_sparet_wait_queue);
1611
1612 /* mpsleep unlocks the mutex */
1613 while (!rf_sparet_resp_queue) {
1614 tsleep(&rf_sparet_resp_queue, PRIBIO,
1615 "raidframe getsparetable", 0);
1616 }
1617 req = rf_sparet_resp_queue;
1618 rf_sparet_resp_queue = req->next;
1619 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1620
1621 retcode = req->fcol;
1622 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1623 * alloc'd */
1624 return (retcode);
1625 }
1626
1627 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1628 * bp & passes it down.
1629 * any calls originating in the kernel must use non-blocking I/O
1630 * do some extra sanity checking to return "appropriate" error values for
1631 * certain conditions (to make some standard utilities work)
1632 *
1633 * Formerly known as: rf_DoAccessKernel
1634 */
1635 void
1636 raidstart(raidPtr)
1637 RF_Raid_t *raidPtr;
1638 {
1639 RF_SectorCount_t num_blocks, pb, sum;
1640 RF_RaidAddr_t raid_addr;
1641 int retcode;
1642 struct partition *pp;
1643 daddr_t blocknum;
1644 int unit;
1645 struct raid_softc *rs;
1646 int do_async;
1647 struct buf *bp;
1648
1649 unit = raidPtr->raidid;
1650 rs = &raid_softc[unit];
1651
1652 /* quick check to see if anything has died recently */
1653 RF_LOCK_MUTEX(raidPtr->mutex);
1654 if (raidPtr->numNewFailures > 0) {
1655 rf_update_component_labels(raidPtr,
1656 RF_NORMAL_COMPONENT_UPDATE);
1657 raidPtr->numNewFailures--;
1658 }
1659
1660 /* Check to see if we're at the limit... */
1661 while (raidPtr->openings > 0) {
1662 RF_UNLOCK_MUTEX(raidPtr->mutex);
1663
1664 /* get the next item, if any, from the queue */
1665 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1666 /* nothing more to do */
1667 return;
1668 }
1669
1670 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1671 * partition.. Need to make it absolute to the underlying
1672 * device.. */
1673
1674 blocknum = bp->b_blkno;
1675 if (DISKPART(bp->b_dev) != RAW_PART) {
1676 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1677 blocknum += pp->p_offset;
1678 }
1679
1680 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1681 (int) blocknum));
1682
1683 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1684 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1685
1686 /* *THIS* is where we adjust what block we're going to...
1687 * but DO NOT TOUCH bp->b_blkno!!! */
1688 raid_addr = blocknum;
1689
1690 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1691 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1692 sum = raid_addr + num_blocks + pb;
1693 if (1 || rf_debugKernelAccess) {
1694 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1695 (int) raid_addr, (int) sum, (int) num_blocks,
1696 (int) pb, (int) bp->b_resid));
1697 }
1698 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1699 || (sum < num_blocks) || (sum < pb)) {
1700 bp->b_error = ENOSPC;
1701 bp->b_flags |= B_ERROR;
1702 bp->b_resid = bp->b_bcount;
1703 biodone(bp);
1704 RF_LOCK_MUTEX(raidPtr->mutex);
1705 continue;
1706 }
1707 /*
1708 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1709 */
1710
1711 if (bp->b_bcount & raidPtr->sectorMask) {
1712 bp->b_error = EINVAL;
1713 bp->b_flags |= B_ERROR;
1714 bp->b_resid = bp->b_bcount;
1715 biodone(bp);
1716 RF_LOCK_MUTEX(raidPtr->mutex);
1717 continue;
1718
1719 }
1720 db1_printf(("Calling DoAccess..\n"));
1721
1722
1723 RF_LOCK_MUTEX(raidPtr->mutex);
1724 raidPtr->openings--;
1725 RF_UNLOCK_MUTEX(raidPtr->mutex);
1726
1727 /*
1728 * Everything is async.
1729 */
1730 do_async = 1;
1731
1732 disk_busy(&rs->sc_dkdev);
1733
1734 /* XXX we're still at splbio() here... do we *really*
1735 need to be? */
1736
1737 /* don't ever condition on bp->b_flags & B_WRITE.
1738 * always condition on B_READ instead */
1739
1740 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1741 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1742 do_async, raid_addr, num_blocks,
1743 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1744
1745 RF_LOCK_MUTEX(raidPtr->mutex);
1746 }
1747 RF_UNLOCK_MUTEX(raidPtr->mutex);
1748 }
1749
1750
1751
1752
1753 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1754
1755 int
1756 rf_DispatchKernelIO(queue, req)
1757 RF_DiskQueue_t *queue;
1758 RF_DiskQueueData_t *req;
1759 {
1760 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1761 struct buf *bp;
1762 struct raidbuf *raidbp = NULL;
1763 struct raid_softc *rs;
1764 int unit;
1765 int s;
1766
1767 s=0;
1768 /* s = splbio();*/ /* want to test this */
1769 /* XXX along with the vnode, we also need the softc associated with
1770 * this device.. */
1771
1772 req->queue = queue;
1773
1774 unit = queue->raidPtr->raidid;
1775
1776 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1777
1778 if (unit >= numraid) {
1779 printf("Invalid unit number: %d %d\n", unit, numraid);
1780 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1781 }
1782 rs = &raid_softc[unit];
1783
1784 bp = req->bp;
1785 #if 1
1786 /* XXX when there is a physical disk failure, someone is passing us a
1787 * buffer that contains old stuff!! Attempt to deal with this problem
1788 * without taking a performance hit... (not sure where the real bug
1789 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1790
1791 if (bp->b_flags & B_ERROR) {
1792 bp->b_flags &= ~B_ERROR;
1793 }
1794 if (bp->b_error != 0) {
1795 bp->b_error = 0;
1796 }
1797 #endif
1798 raidbp = RAIDGETBUF(rs);
1799
1800 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1801
1802 /*
1803 * context for raidiodone
1804 */
1805 raidbp->rf_obp = bp;
1806 raidbp->req = req;
1807
1808 LIST_INIT(&raidbp->rf_buf.b_dep);
1809
1810 switch (req->type) {
1811 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1812 /* XXX need to do something extra here.. */
1813 /* I'm leaving this in, as I've never actually seen it used,
1814 * and I'd like folks to report it... GO */
1815 printf(("WAKEUP CALLED\n"));
1816 queue->numOutstanding++;
1817
1818 /* XXX need to glue the original buffer into this?? */
1819
1820 KernelWakeupFunc(&raidbp->rf_buf);
1821 break;
1822
1823 case RF_IO_TYPE_READ:
1824 case RF_IO_TYPE_WRITE:
1825
1826 if (req->tracerec) {
1827 RF_ETIMER_START(req->tracerec->timer);
1828 }
1829 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1830 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1831 req->sectorOffset, req->numSector,
1832 req->buf, KernelWakeupFunc, (void *) req,
1833 queue->raidPtr->logBytesPerSector, req->b_proc);
1834
1835 if (rf_debugKernelAccess) {
1836 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1837 (long) bp->b_blkno));
1838 }
1839 queue->numOutstanding++;
1840 queue->last_deq_sector = req->sectorOffset;
1841 /* acc wouldn't have been let in if there were any pending
1842 * reqs at any other priority */
1843 queue->curPriority = req->priority;
1844
1845 db1_printf(("Going for %c to unit %d row %d col %d\n",
1846 req->type, unit, queue->row, queue->col));
1847 db1_printf(("sector %d count %d (%d bytes) %d\n",
1848 (int) req->sectorOffset, (int) req->numSector,
1849 (int) (req->numSector <<
1850 queue->raidPtr->logBytesPerSector),
1851 (int) queue->raidPtr->logBytesPerSector));
1852 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1853 raidbp->rf_buf.b_vp->v_numoutput++;
1854 }
1855 VOP_STRATEGY(&raidbp->rf_buf);
1856
1857 break;
1858
1859 default:
1860 panic("bad req->type in rf_DispatchKernelIO");
1861 }
1862 db1_printf(("Exiting from DispatchKernelIO\n"));
1863 /* splx(s); */ /* want to test this */
1864 return (0);
1865 }
1866 /* this is the callback function associated with a I/O invoked from
1867 kernel code.
1868 */
1869 static void
1870 KernelWakeupFunc(vbp)
1871 struct buf *vbp;
1872 {
1873 RF_DiskQueueData_t *req = NULL;
1874 RF_DiskQueue_t *queue;
1875 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1876 struct buf *bp;
1877 struct raid_softc *rs;
1878 int unit;
1879 int s;
1880
1881 s = splbio();
1882 db1_printf(("recovering the request queue:\n"));
1883 req = raidbp->req;
1884
1885 bp = raidbp->rf_obp;
1886
1887 queue = (RF_DiskQueue_t *) req->queue;
1888
1889 if (raidbp->rf_buf.b_flags & B_ERROR) {
1890 bp->b_flags |= B_ERROR;
1891 bp->b_error = raidbp->rf_buf.b_error ?
1892 raidbp->rf_buf.b_error : EIO;
1893 }
1894
1895 /* XXX methinks this could be wrong... */
1896 #if 1
1897 bp->b_resid = raidbp->rf_buf.b_resid;
1898 #endif
1899
1900 if (req->tracerec) {
1901 RF_ETIMER_STOP(req->tracerec->timer);
1902 RF_ETIMER_EVAL(req->tracerec->timer);
1903 RF_LOCK_MUTEX(rf_tracing_mutex);
1904 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1905 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1906 req->tracerec->num_phys_ios++;
1907 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1908 }
1909 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1910
1911 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1912
1913
1914 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1915 * ballistic, and mark the component as hosed... */
1916
1917 if (bp->b_flags & B_ERROR) {
1918 /* Mark the disk as dead */
1919 /* but only mark it once... */
1920 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1921 rf_ds_optimal) {
1922 printf("raid%d: IO Error. Marking %s as failed.\n",
1923 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1924 queue->raidPtr->Disks[queue->row][queue->col].status =
1925 rf_ds_failed;
1926 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1927 queue->raidPtr->numFailures++;
1928 queue->raidPtr->numNewFailures++;
1929 } else { /* Disk is already dead... */
1930 /* printf("Disk already marked as dead!\n"); */
1931 }
1932
1933 }
1934
1935 rs = &raid_softc[unit];
1936 RAIDPUTBUF(rs, raidbp);
1937
1938 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1939 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1940
1941 splx(s);
1942 }
1943
1944
1945
1946 /*
1947 * initialize a buf structure for doing an I/O in the kernel.
1948 */
1949 static void
1950 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1951 logBytesPerSector, b_proc)
1952 struct buf *bp;
1953 struct vnode *b_vp;
1954 unsigned rw_flag;
1955 dev_t dev;
1956 RF_SectorNum_t startSect;
1957 RF_SectorCount_t numSect;
1958 caddr_t buf;
1959 void (*cbFunc) (struct buf *);
1960 void *cbArg;
1961 int logBytesPerSector;
1962 struct proc *b_proc;
1963 {
1964 /* bp->b_flags = B_PHYS | rw_flag; */
1965 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1966 bp->b_bcount = numSect << logBytesPerSector;
1967 bp->b_bufsize = bp->b_bcount;
1968 bp->b_error = 0;
1969 bp->b_dev = dev;
1970 bp->b_data = buf;
1971 bp->b_blkno = startSect;
1972 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1973 if (bp->b_bcount == 0) {
1974 panic("bp->b_bcount is zero in InitBP!!\n");
1975 }
1976 bp->b_proc = b_proc;
1977 bp->b_iodone = cbFunc;
1978 bp->b_vp = b_vp;
1979
1980 }
1981
1982 static void
1983 raidgetdefaultlabel(raidPtr, rs, lp)
1984 RF_Raid_t *raidPtr;
1985 struct raid_softc *rs;
1986 struct disklabel *lp;
1987 {
1988 db1_printf(("Building a default label...\n"));
1989 memset(lp, 0, sizeof(*lp));
1990
1991 /* fabricate a label... */
1992 lp->d_secperunit = raidPtr->totalSectors;
1993 lp->d_secsize = raidPtr->bytesPerSector;
1994 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1995 lp->d_ntracks = 4 * raidPtr->numCol;
1996 lp->d_ncylinders = raidPtr->totalSectors /
1997 (lp->d_nsectors * lp->d_ntracks);
1998 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1999
2000 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2001 lp->d_type = DTYPE_RAID;
2002 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2003 lp->d_rpm = 3600;
2004 lp->d_interleave = 1;
2005 lp->d_flags = 0;
2006
2007 lp->d_partitions[RAW_PART].p_offset = 0;
2008 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2009 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2010 lp->d_npartitions = RAW_PART + 1;
2011
2012 lp->d_magic = DISKMAGIC;
2013 lp->d_magic2 = DISKMAGIC;
2014 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2015
2016 }
2017 /*
2018 * Read the disklabel from the raid device. If one is not present, fake one
2019 * up.
2020 */
2021 static void
2022 raidgetdisklabel(dev)
2023 dev_t dev;
2024 {
2025 int unit = raidunit(dev);
2026 struct raid_softc *rs = &raid_softc[unit];
2027 char *errstring;
2028 struct disklabel *lp = rs->sc_dkdev.dk_label;
2029 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2030 RF_Raid_t *raidPtr;
2031
2032 db1_printf(("Getting the disklabel...\n"));
2033
2034 memset(clp, 0, sizeof(*clp));
2035
2036 raidPtr = raidPtrs[unit];
2037
2038 raidgetdefaultlabel(raidPtr, rs, lp);
2039
2040 /*
2041 * Call the generic disklabel extraction routine.
2042 */
2043 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2044 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2045 if (errstring)
2046 raidmakedisklabel(rs);
2047 else {
2048 int i;
2049 struct partition *pp;
2050
2051 /*
2052 * Sanity check whether the found disklabel is valid.
2053 *
2054 * This is necessary since total size of the raid device
2055 * may vary when an interleave is changed even though exactly
2056 * same componets are used, and old disklabel may used
2057 * if that is found.
2058 */
2059 if (lp->d_secperunit != rs->sc_size)
2060 printf("raid%d: WARNING: %s: "
2061 "total sector size in disklabel (%d) != "
2062 "the size of raid (%ld)\n", unit, rs->sc_xname,
2063 lp->d_secperunit, (long) rs->sc_size);
2064 for (i = 0; i < lp->d_npartitions; i++) {
2065 pp = &lp->d_partitions[i];
2066 if (pp->p_offset + pp->p_size > rs->sc_size)
2067 printf("raid%d: WARNING: %s: end of partition `%c' "
2068 "exceeds the size of raid (%ld)\n",
2069 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2070 }
2071 }
2072
2073 }
2074 /*
2075 * Take care of things one might want to take care of in the event
2076 * that a disklabel isn't present.
2077 */
2078 static void
2079 raidmakedisklabel(rs)
2080 struct raid_softc *rs;
2081 {
2082 struct disklabel *lp = rs->sc_dkdev.dk_label;
2083 db1_printf(("Making a label..\n"));
2084
2085 /*
2086 * For historical reasons, if there's no disklabel present
2087 * the raw partition must be marked FS_BSDFFS.
2088 */
2089
2090 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2091
2092 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2093
2094 lp->d_checksum = dkcksum(lp);
2095 }
2096 /*
2097 * Lookup the provided name in the filesystem. If the file exists,
2098 * is a valid block device, and isn't being used by anyone else,
2099 * set *vpp to the file's vnode.
2100 * You'll find the original of this in ccd.c
2101 */
2102 int
2103 raidlookup(path, p, vpp)
2104 char *path;
2105 struct proc *p;
2106 struct vnode **vpp; /* result */
2107 {
2108 struct nameidata nd;
2109 struct vnode *vp;
2110 struct vattr va;
2111 int error;
2112
2113 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2114 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2115 #if 0
2116 printf("RAIDframe: vn_open returned %d\n", error);
2117 #endif
2118 return (error);
2119 }
2120 vp = nd.ni_vp;
2121 if (vp->v_usecount > 1) {
2122 VOP_UNLOCK(vp, 0);
2123 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2124 return (EBUSY);
2125 }
2126 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2127 VOP_UNLOCK(vp, 0);
2128 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2129 return (error);
2130 }
2131 /* XXX: eventually we should handle VREG, too. */
2132 if (va.va_type != VBLK) {
2133 VOP_UNLOCK(vp, 0);
2134 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2135 return (ENOTBLK);
2136 }
2137 VOP_UNLOCK(vp, 0);
2138 *vpp = vp;
2139 return (0);
2140 }
2141 /*
2142 * Wait interruptibly for an exclusive lock.
2143 *
2144 * XXX
2145 * Several drivers do this; it should be abstracted and made MP-safe.
2146 * (Hmm... where have we seen this warning before :-> GO )
2147 */
2148 static int
2149 raidlock(rs)
2150 struct raid_softc *rs;
2151 {
2152 int error;
2153
2154 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2155 rs->sc_flags |= RAIDF_WANTED;
2156 if ((error =
2157 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2158 return (error);
2159 }
2160 rs->sc_flags |= RAIDF_LOCKED;
2161 return (0);
2162 }
2163 /*
2164 * Unlock and wake up any waiters.
2165 */
2166 static void
2167 raidunlock(rs)
2168 struct raid_softc *rs;
2169 {
2170
2171 rs->sc_flags &= ~RAIDF_LOCKED;
2172 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2173 rs->sc_flags &= ~RAIDF_WANTED;
2174 wakeup(rs);
2175 }
2176 }
2177
2178
2179 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2180 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2181
2182 int
2183 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2184 {
2185 RF_ComponentLabel_t clabel;
2186 raidread_component_label(dev, b_vp, &clabel);
2187 clabel.mod_counter = mod_counter;
2188 clabel.clean = RF_RAID_CLEAN;
2189 raidwrite_component_label(dev, b_vp, &clabel);
2190 return(0);
2191 }
2192
2193
2194 int
2195 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2196 {
2197 RF_ComponentLabel_t clabel;
2198 raidread_component_label(dev, b_vp, &clabel);
2199 clabel.mod_counter = mod_counter;
2200 clabel.clean = RF_RAID_DIRTY;
2201 raidwrite_component_label(dev, b_vp, &clabel);
2202 return(0);
2203 }
2204
2205 /* ARGSUSED */
2206 int
2207 raidread_component_label(dev, b_vp, clabel)
2208 dev_t dev;
2209 struct vnode *b_vp;
2210 RF_ComponentLabel_t *clabel;
2211 {
2212 struct buf *bp;
2213 int error;
2214
2215 /* XXX should probably ensure that we don't try to do this if
2216 someone has changed rf_protected_sectors. */
2217
2218 if (b_vp == NULL) {
2219 /* For whatever reason, this component is not valid.
2220 Don't try to read a component label from it. */
2221 return(EINVAL);
2222 }
2223
2224 /* get a block of the appropriate size... */
2225 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2226 bp->b_dev = dev;
2227
2228 /* get our ducks in a row for the read */
2229 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2230 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2231 bp->b_flags |= B_READ;
2232 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2233
2234 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2235
2236 error = biowait(bp);
2237
2238 if (!error) {
2239 memcpy(clabel, bp->b_data,
2240 sizeof(RF_ComponentLabel_t));
2241 #if 0
2242 rf_print_component_label( clabel );
2243 #endif
2244 } else {
2245 #if 0
2246 printf("Failed to read RAID component label!\n");
2247 #endif
2248 }
2249
2250 brelse(bp);
2251 return(error);
2252 }
2253 /* ARGSUSED */
2254 int
2255 raidwrite_component_label(dev, b_vp, clabel)
2256 dev_t dev;
2257 struct vnode *b_vp;
2258 RF_ComponentLabel_t *clabel;
2259 {
2260 struct buf *bp;
2261 int error;
2262
2263 /* get a block of the appropriate size... */
2264 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2265 bp->b_dev = dev;
2266
2267 /* get our ducks in a row for the write */
2268 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2269 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2270 bp->b_flags |= B_WRITE;
2271 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2272
2273 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2274
2275 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2276
2277 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2278 error = biowait(bp);
2279 brelse(bp);
2280 if (error) {
2281 #if 1
2282 printf("Failed to write RAID component info!\n");
2283 #endif
2284 }
2285
2286 return(error);
2287 }
2288
2289 void
2290 rf_markalldirty(raidPtr)
2291 RF_Raid_t *raidPtr;
2292 {
2293 RF_ComponentLabel_t clabel;
2294 int r,c;
2295
2296 raidPtr->mod_counter++;
2297 for (r = 0; r < raidPtr->numRow; r++) {
2298 for (c = 0; c < raidPtr->numCol; c++) {
2299 /* we don't want to touch (at all) a disk that has
2300 failed */
2301 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2302 raidread_component_label(
2303 raidPtr->Disks[r][c].dev,
2304 raidPtr->raid_cinfo[r][c].ci_vp,
2305 &clabel);
2306 if (clabel.status == rf_ds_spared) {
2307 /* XXX do something special...
2308 but whatever you do, don't
2309 try to access it!! */
2310 } else {
2311 #if 0
2312 clabel.status =
2313 raidPtr->Disks[r][c].status;
2314 raidwrite_component_label(
2315 raidPtr->Disks[r][c].dev,
2316 raidPtr->raid_cinfo[r][c].ci_vp,
2317 &clabel);
2318 #endif
2319 raidmarkdirty(
2320 raidPtr->Disks[r][c].dev,
2321 raidPtr->raid_cinfo[r][c].ci_vp,
2322 raidPtr->mod_counter);
2323 }
2324 }
2325 }
2326 }
2327 /* printf("Component labels marked dirty.\n"); */
2328 #if 0
2329 for( c = 0; c < raidPtr->numSpare ; c++) {
2330 sparecol = raidPtr->numCol + c;
2331 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2332 /*
2333
2334 XXX this is where we get fancy and map this spare
2335 into it's correct spot in the array.
2336
2337 */
2338 /*
2339
2340 we claim this disk is "optimal" if it's
2341 rf_ds_used_spare, as that means it should be
2342 directly substitutable for the disk it replaced.
2343 We note that too...
2344
2345 */
2346
2347 for(i=0;i<raidPtr->numRow;i++) {
2348 for(j=0;j<raidPtr->numCol;j++) {
2349 if ((raidPtr->Disks[i][j].spareRow ==
2350 r) &&
2351 (raidPtr->Disks[i][j].spareCol ==
2352 sparecol)) {
2353 srow = r;
2354 scol = sparecol;
2355 break;
2356 }
2357 }
2358 }
2359
2360 raidread_component_label(
2361 raidPtr->Disks[r][sparecol].dev,
2362 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2363 &clabel);
2364 /* make sure status is noted */
2365 clabel.version = RF_COMPONENT_LABEL_VERSION;
2366 clabel.mod_counter = raidPtr->mod_counter;
2367 clabel.serial_number = raidPtr->serial_number;
2368 clabel.row = srow;
2369 clabel.column = scol;
2370 clabel.num_rows = raidPtr->numRow;
2371 clabel.num_columns = raidPtr->numCol;
2372 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2373 clabel.status = rf_ds_optimal;
2374 raidwrite_component_label(
2375 raidPtr->Disks[r][sparecol].dev,
2376 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2377 &clabel);
2378 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2379 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2380 }
2381 }
2382
2383 #endif
2384 }
2385
2386
2387 void
2388 rf_update_component_labels(raidPtr, final)
2389 RF_Raid_t *raidPtr;
2390 int final;
2391 {
2392 RF_ComponentLabel_t clabel;
2393 int sparecol;
2394 int r,c;
2395 int i,j;
2396 int srow, scol;
2397
2398 srow = -1;
2399 scol = -1;
2400
2401 /* XXX should do extra checks to make sure things really are clean,
2402 rather than blindly setting the clean bit... */
2403
2404 raidPtr->mod_counter++;
2405
2406 for (r = 0; r < raidPtr->numRow; r++) {
2407 for (c = 0; c < raidPtr->numCol; c++) {
2408 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2409 raidread_component_label(
2410 raidPtr->Disks[r][c].dev,
2411 raidPtr->raid_cinfo[r][c].ci_vp,
2412 &clabel);
2413 /* make sure status is noted */
2414 clabel.status = rf_ds_optimal;
2415 /* bump the counter */
2416 clabel.mod_counter = raidPtr->mod_counter;
2417
2418 raidwrite_component_label(
2419 raidPtr->Disks[r][c].dev,
2420 raidPtr->raid_cinfo[r][c].ci_vp,
2421 &clabel);
2422 if (final == RF_FINAL_COMPONENT_UPDATE) {
2423 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2424 raidmarkclean(
2425 raidPtr->Disks[r][c].dev,
2426 raidPtr->raid_cinfo[r][c].ci_vp,
2427 raidPtr->mod_counter);
2428 }
2429 }
2430 }
2431 /* else we don't touch it.. */
2432 }
2433 }
2434
2435 for( c = 0; c < raidPtr->numSpare ; c++) {
2436 sparecol = raidPtr->numCol + c;
2437 /* Need to ensure that the reconstruct actually completed! */
2438 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2439 /*
2440
2441 we claim this disk is "optimal" if it's
2442 rf_ds_used_spare, as that means it should be
2443 directly substitutable for the disk it replaced.
2444 We note that too...
2445
2446 */
2447
2448 for(i=0;i<raidPtr->numRow;i++) {
2449 for(j=0;j<raidPtr->numCol;j++) {
2450 if ((raidPtr->Disks[i][j].spareRow ==
2451 0) &&
2452 (raidPtr->Disks[i][j].spareCol ==
2453 sparecol)) {
2454 srow = i;
2455 scol = j;
2456 break;
2457 }
2458 }
2459 }
2460
2461 /* XXX shouldn't *really* need this... */
2462 raidread_component_label(
2463 raidPtr->Disks[0][sparecol].dev,
2464 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2465 &clabel);
2466 /* make sure status is noted */
2467
2468 raid_init_component_label(raidPtr, &clabel);
2469
2470 clabel.mod_counter = raidPtr->mod_counter;
2471 clabel.row = srow;
2472 clabel.column = scol;
2473 clabel.status = rf_ds_optimal;
2474
2475 raidwrite_component_label(
2476 raidPtr->Disks[0][sparecol].dev,
2477 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2478 &clabel);
2479 if (final == RF_FINAL_COMPONENT_UPDATE) {
2480 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2481 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2482 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2483 raidPtr->mod_counter);
2484 }
2485 }
2486 }
2487 }
2488 /* printf("Component labels updated\n"); */
2489 }
2490
2491 void
2492 rf_close_component(raidPtr, vp, auto_configured)
2493 RF_Raid_t *raidPtr;
2494 struct vnode *vp;
2495 int auto_configured;
2496 {
2497 struct proc *p;
2498
2499 p = raidPtr->engine_thread;
2500
2501 if (vp != NULL) {
2502 if (auto_configured == 1) {
2503 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2504 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2505 vput(vp);
2506
2507 } else {
2508 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2509 }
2510 } else {
2511 #if 0
2512 printf("vnode was NULL\n");
2513 #endif
2514 }
2515 }
2516
2517
2518 void
2519 rf_UnconfigureVnodes(raidPtr)
2520 RF_Raid_t *raidPtr;
2521 {
2522 int r,c;
2523 struct proc *p;
2524 struct vnode *vp;
2525 int acd;
2526
2527
2528 /* We take this opportunity to close the vnodes like we should.. */
2529
2530 p = raidPtr->engine_thread;
2531
2532 for (r = 0; r < raidPtr->numRow; r++) {
2533 for (c = 0; c < raidPtr->numCol; c++) {
2534 #if 0
2535 printf("raid%d: Closing vnode for row: %d col: %d\n",
2536 raidPtr->raidid, r, c);
2537 #endif
2538 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2539 acd = raidPtr->Disks[r][c].auto_configured;
2540 rf_close_component(raidPtr, vp, acd);
2541 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2542 raidPtr->Disks[r][c].auto_configured = 0;
2543 }
2544 }
2545 for (r = 0; r < raidPtr->numSpare; r++) {
2546 #if 0
2547 printf("raid%d: Closing vnode for spare: %d\n",
2548 raidPtr->raidid, r);
2549 #endif
2550 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2551 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2552 rf_close_component(raidPtr, vp, acd);
2553 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2554 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2555 }
2556 }
2557
2558
2559 void
2560 rf_ReconThread(req)
2561 struct rf_recon_req *req;
2562 {
2563 int s;
2564 RF_Raid_t *raidPtr;
2565
2566 s = splbio();
2567 raidPtr = (RF_Raid_t *) req->raidPtr;
2568 raidPtr->recon_in_progress = 1;
2569
2570 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2571 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2572
2573 /* XXX get rid of this! we don't need it at all.. */
2574 RF_Free(req, sizeof(*req));
2575
2576 raidPtr->recon_in_progress = 0;
2577 splx(s);
2578
2579 /* That's all... */
2580 kthread_exit(0); /* does not return */
2581 }
2582
2583 void
2584 rf_RewriteParityThread(raidPtr)
2585 RF_Raid_t *raidPtr;
2586 {
2587 int retcode;
2588 int s;
2589
2590 raidPtr->parity_rewrite_in_progress = 1;
2591 s = splbio();
2592 retcode = rf_RewriteParity(raidPtr);
2593 splx(s);
2594 if (retcode) {
2595 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2596 } else {
2597 /* set the clean bit! If we shutdown correctly,
2598 the clean bit on each component label will get
2599 set */
2600 raidPtr->parity_good = RF_RAID_CLEAN;
2601 }
2602 raidPtr->parity_rewrite_in_progress = 0;
2603
2604 /* Anyone waiting for us to stop? If so, inform them... */
2605 if (raidPtr->waitShutdown) {
2606 wakeup(&raidPtr->parity_rewrite_in_progress);
2607 }
2608
2609 /* That's all... */
2610 kthread_exit(0); /* does not return */
2611 }
2612
2613
2614 void
2615 rf_CopybackThread(raidPtr)
2616 RF_Raid_t *raidPtr;
2617 {
2618 int s;
2619
2620 raidPtr->copyback_in_progress = 1;
2621 s = splbio();
2622 rf_CopybackReconstructedData(raidPtr);
2623 splx(s);
2624 raidPtr->copyback_in_progress = 0;
2625
2626 /* That's all... */
2627 kthread_exit(0); /* does not return */
2628 }
2629
2630
2631 void
2632 rf_ReconstructInPlaceThread(req)
2633 struct rf_recon_req *req;
2634 {
2635 int retcode;
2636 int s;
2637 RF_Raid_t *raidPtr;
2638
2639 s = splbio();
2640 raidPtr = req->raidPtr;
2641 raidPtr->recon_in_progress = 1;
2642 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2643 RF_Free(req, sizeof(*req));
2644 raidPtr->recon_in_progress = 0;
2645 splx(s);
2646
2647 /* That's all... */
2648 kthread_exit(0); /* does not return */
2649 }
2650
2651 void
2652 rf_mountroot_hook(dev)
2653 struct device *dev;
2654 {
2655
2656 }
2657
2658
2659 RF_AutoConfig_t *
2660 rf_find_raid_components()
2661 {
2662 struct devnametobdevmaj *dtobdm;
2663 struct vnode *vp;
2664 struct disklabel label;
2665 struct device *dv;
2666 char *cd_name;
2667 dev_t dev;
2668 int error;
2669 int i;
2670 int good_one;
2671 RF_ComponentLabel_t *clabel;
2672 RF_AutoConfig_t *ac_list;
2673 RF_AutoConfig_t *ac;
2674
2675
2676 /* initialize the AutoConfig list */
2677 ac_list = NULL;
2678
2679 /* we begin by trolling through *all* the devices on the system */
2680
2681 for (dv = alldevs.tqh_first; dv != NULL;
2682 dv = dv->dv_list.tqe_next) {
2683
2684 /* we are only interested in disks... */
2685 if (dv->dv_class != DV_DISK)
2686 continue;
2687
2688 /* we don't care about floppies... */
2689 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2690 continue;
2691 }
2692
2693 /* we don't care about CD's... */
2694 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2695 continue;
2696 }
2697
2698 /* hdfd is the Atari/Hades floppy driver */
2699 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2700 continue;
2701 }
2702 /* fdisa is the Atari/Milan floppy driver */
2703 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2704 continue;
2705 }
2706
2707 /* need to find the device_name_to_block_device_major stuff */
2708 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2709 dtobdm = dev_name2blk;
2710 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2711 dtobdm++;
2712 }
2713
2714 /* get a vnode for the raw partition of this disk */
2715
2716 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2717 if (bdevvp(dev, &vp))
2718 panic("RAID can't alloc vnode");
2719
2720 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2721
2722 if (error) {
2723 /* "Who cares." Continue looking
2724 for something that exists*/
2725 vput(vp);
2726 continue;
2727 }
2728
2729 /* Ok, the disk exists. Go get the disklabel. */
2730 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2731 FREAD, NOCRED, 0);
2732 if (error) {
2733 /*
2734 * XXX can't happen - open() would
2735 * have errored out (or faked up one)
2736 */
2737 printf("can't get label for dev %s%c (%d)!?!?\n",
2738 dv->dv_xname, 'a' + RAW_PART, error);
2739 }
2740
2741 /* don't need this any more. We'll allocate it again
2742 a little later if we really do... */
2743 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2744 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2745 vput(vp);
2746
2747 for (i=0; i < label.d_npartitions; i++) {
2748 /* We only support partitions marked as RAID */
2749 if (label.d_partitions[i].p_fstype != FS_RAID)
2750 continue;
2751
2752 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2753 if (bdevvp(dev, &vp))
2754 panic("RAID can't alloc vnode");
2755
2756 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2757 if (error) {
2758 /* Whatever... */
2759 vput(vp);
2760 continue;
2761 }
2762
2763 good_one = 0;
2764
2765 clabel = (RF_ComponentLabel_t *)
2766 malloc(sizeof(RF_ComponentLabel_t),
2767 M_RAIDFRAME, M_NOWAIT);
2768 if (clabel == NULL) {
2769 /* XXX CLEANUP HERE */
2770 printf("RAID auto config: out of memory!\n");
2771 return(NULL); /* XXX probably should panic? */
2772 }
2773
2774 if (!raidread_component_label(dev, vp, clabel)) {
2775 /* Got the label. Does it look reasonable? */
2776 if (rf_reasonable_label(clabel) &&
2777 (clabel->partitionSize <=
2778 label.d_partitions[i].p_size)) {
2779 #if DEBUG
2780 printf("Component on: %s%c: %d\n",
2781 dv->dv_xname, 'a'+i,
2782 label.d_partitions[i].p_size);
2783 rf_print_component_label(clabel);
2784 #endif
2785 /* if it's reasonable, add it,
2786 else ignore it. */
2787 ac = (RF_AutoConfig_t *)
2788 malloc(sizeof(RF_AutoConfig_t),
2789 M_RAIDFRAME,
2790 M_NOWAIT);
2791 if (ac == NULL) {
2792 /* XXX should panic?? */
2793 return(NULL);
2794 }
2795
2796 sprintf(ac->devname, "%s%c",
2797 dv->dv_xname, 'a'+i);
2798 ac->dev = dev;
2799 ac->vp = vp;
2800 ac->clabel = clabel;
2801 ac->next = ac_list;
2802 ac_list = ac;
2803 good_one = 1;
2804 }
2805 }
2806 if (!good_one) {
2807 /* cleanup */
2808 free(clabel, M_RAIDFRAME);
2809 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2810 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2811 vput(vp);
2812 }
2813 }
2814 }
2815 return(ac_list);
2816 }
2817
2818 static int
2819 rf_reasonable_label(clabel)
2820 RF_ComponentLabel_t *clabel;
2821 {
2822
2823 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2824 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2825 ((clabel->clean == RF_RAID_CLEAN) ||
2826 (clabel->clean == RF_RAID_DIRTY)) &&
2827 clabel->row >=0 &&
2828 clabel->column >= 0 &&
2829 clabel->num_rows > 0 &&
2830 clabel->num_columns > 0 &&
2831 clabel->row < clabel->num_rows &&
2832 clabel->column < clabel->num_columns &&
2833 clabel->blockSize > 0 &&
2834 clabel->numBlocks > 0) {
2835 /* label looks reasonable enough... */
2836 return(1);
2837 }
2838 return(0);
2839 }
2840
2841
2842 void
2843 rf_print_component_label(clabel)
2844 RF_ComponentLabel_t *clabel;
2845 {
2846 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2847 clabel->row, clabel->column,
2848 clabel->num_rows, clabel->num_columns);
2849 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2850 clabel->version, clabel->serial_number,
2851 clabel->mod_counter);
2852 printf(" Clean: %s Status: %d\n",
2853 clabel->clean ? "Yes" : "No", clabel->status );
2854 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2855 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2856 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2857 (char) clabel->parityConfig, clabel->blockSize,
2858 clabel->numBlocks);
2859 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2860 printf(" Contains root partition: %s\n",
2861 clabel->root_partition ? "Yes" : "No" );
2862 printf(" Last configured as: raid%d\n", clabel->last_unit );
2863 #if 0
2864 printf(" Config order: %d\n", clabel->config_order);
2865 #endif
2866
2867 }
2868
2869 RF_ConfigSet_t *
2870 rf_create_auto_sets(ac_list)
2871 RF_AutoConfig_t *ac_list;
2872 {
2873 RF_AutoConfig_t *ac;
2874 RF_ConfigSet_t *config_sets;
2875 RF_ConfigSet_t *cset;
2876 RF_AutoConfig_t *ac_next;
2877
2878
2879 config_sets = NULL;
2880
2881 /* Go through the AutoConfig list, and figure out which components
2882 belong to what sets. */
2883 ac = ac_list;
2884 while(ac!=NULL) {
2885 /* we're going to putz with ac->next, so save it here
2886 for use at the end of the loop */
2887 ac_next = ac->next;
2888
2889 if (config_sets == NULL) {
2890 /* will need at least this one... */
2891 config_sets = (RF_ConfigSet_t *)
2892 malloc(sizeof(RF_ConfigSet_t),
2893 M_RAIDFRAME, M_NOWAIT);
2894 if (config_sets == NULL) {
2895 panic("rf_create_auto_sets: No memory!\n");
2896 }
2897 /* this one is easy :) */
2898 config_sets->ac = ac;
2899 config_sets->next = NULL;
2900 config_sets->rootable = 0;
2901 ac->next = NULL;
2902 } else {
2903 /* which set does this component fit into? */
2904 cset = config_sets;
2905 while(cset!=NULL) {
2906 if (rf_does_it_fit(cset, ac)) {
2907 /* looks like it matches... */
2908 ac->next = cset->ac;
2909 cset->ac = ac;
2910 break;
2911 }
2912 cset = cset->next;
2913 }
2914 if (cset==NULL) {
2915 /* didn't find a match above... new set..*/
2916 cset = (RF_ConfigSet_t *)
2917 malloc(sizeof(RF_ConfigSet_t),
2918 M_RAIDFRAME, M_NOWAIT);
2919 if (cset == NULL) {
2920 panic("rf_create_auto_sets: No memory!\n");
2921 }
2922 cset->ac = ac;
2923 ac->next = NULL;
2924 cset->next = config_sets;
2925 cset->rootable = 0;
2926 config_sets = cset;
2927 }
2928 }
2929 ac = ac_next;
2930 }
2931
2932
2933 return(config_sets);
2934 }
2935
2936 static int
2937 rf_does_it_fit(cset, ac)
2938 RF_ConfigSet_t *cset;
2939 RF_AutoConfig_t *ac;
2940 {
2941 RF_ComponentLabel_t *clabel1, *clabel2;
2942
2943 /* If this one matches the *first* one in the set, that's good
2944 enough, since the other members of the set would have been
2945 through here too... */
2946 /* note that we are not checking partitionSize here..
2947
2948 Note that we are also not checking the mod_counters here.
2949 If everything else matches execpt the mod_counter, that's
2950 good enough for this test. We will deal with the mod_counters
2951 a little later in the autoconfiguration process.
2952
2953 (clabel1->mod_counter == clabel2->mod_counter) &&
2954
2955 The reason we don't check for this is that failed disks
2956 will have lower modification counts. If those disks are
2957 not added to the set they used to belong to, then they will
2958 form their own set, which may result in 2 different sets,
2959 for example, competing to be configured at raid0, and
2960 perhaps competing to be the root filesystem set. If the
2961 wrong ones get configured, or both attempt to become /,
2962 weird behaviour and or serious lossage will occur. Thus we
2963 need to bring them into the fold here, and kick them out at
2964 a later point.
2965
2966 */
2967
2968 clabel1 = cset->ac->clabel;
2969 clabel2 = ac->clabel;
2970 if ((clabel1->version == clabel2->version) &&
2971 (clabel1->serial_number == clabel2->serial_number) &&
2972 (clabel1->num_rows == clabel2->num_rows) &&
2973 (clabel1->num_columns == clabel2->num_columns) &&
2974 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2975 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2976 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2977 (clabel1->parityConfig == clabel2->parityConfig) &&
2978 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2979 (clabel1->blockSize == clabel2->blockSize) &&
2980 (clabel1->numBlocks == clabel2->numBlocks) &&
2981 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2982 (clabel1->root_partition == clabel2->root_partition) &&
2983 (clabel1->last_unit == clabel2->last_unit) &&
2984 (clabel1->config_order == clabel2->config_order)) {
2985 /* if it get's here, it almost *has* to be a match */
2986 } else {
2987 /* it's not consistent with somebody in the set..
2988 punt */
2989 return(0);
2990 }
2991 /* all was fine.. it must fit... */
2992 return(1);
2993 }
2994
2995 int
2996 rf_have_enough_components(cset)
2997 RF_ConfigSet_t *cset;
2998 {
2999 RF_AutoConfig_t *ac;
3000 RF_AutoConfig_t *auto_config;
3001 RF_ComponentLabel_t *clabel;
3002 int r,c;
3003 int num_rows;
3004 int num_cols;
3005 int num_missing;
3006 int mod_counter;
3007 int mod_counter_found;
3008 int even_pair_failed;
3009 char parity_type;
3010
3011
3012 /* check to see that we have enough 'live' components
3013 of this set. If so, we can configure it if necessary */
3014
3015 num_rows = cset->ac->clabel->num_rows;
3016 num_cols = cset->ac->clabel->num_columns;
3017 parity_type = cset->ac->clabel->parityConfig;
3018
3019 /* XXX Check for duplicate components!?!?!? */
3020
3021 /* Determine what the mod_counter is supposed to be for this set. */
3022
3023 mod_counter_found = 0;
3024 mod_counter = 0;
3025 ac = cset->ac;
3026 while(ac!=NULL) {
3027 if (mod_counter_found==0) {
3028 mod_counter = ac->clabel->mod_counter;
3029 mod_counter_found = 1;
3030 } else {
3031 if (ac->clabel->mod_counter > mod_counter) {
3032 mod_counter = ac->clabel->mod_counter;
3033 }
3034 }
3035 ac = ac->next;
3036 }
3037
3038 num_missing = 0;
3039 auto_config = cset->ac;
3040
3041 for(r=0; r<num_rows; r++) {
3042 even_pair_failed = 0;
3043 for(c=0; c<num_cols; c++) {
3044 ac = auto_config;
3045 while(ac!=NULL) {
3046 if ((ac->clabel->row == r) &&
3047 (ac->clabel->column == c) &&
3048 (ac->clabel->mod_counter == mod_counter)) {
3049 /* it's this one... */
3050 #if DEBUG
3051 printf("Found: %s at %d,%d\n",
3052 ac->devname,r,c);
3053 #endif
3054 break;
3055 }
3056 ac=ac->next;
3057 }
3058 if (ac==NULL) {
3059 /* Didn't find one here! */
3060 /* special case for RAID 1, especially
3061 where there are more than 2
3062 components (where RAIDframe treats
3063 things a little differently :( ) */
3064 if (parity_type == '1') {
3065 if (c%2 == 0) { /* even component */
3066 even_pair_failed = 1;
3067 } else { /* odd component. If
3068 we're failed, and
3069 so is the even
3070 component, it's
3071 "Good Night, Charlie" */
3072 if (even_pair_failed == 1) {
3073 return(0);
3074 }
3075 }
3076 } else {
3077 /* normal accounting */
3078 num_missing++;
3079 }
3080 }
3081 if ((parity_type == '1') && (c%2 == 1)) {
3082 /* Just did an even component, and we didn't
3083 bail.. reset the even_pair_failed flag,
3084 and go on to the next component.... */
3085 even_pair_failed = 0;
3086 }
3087 }
3088 }
3089
3090 clabel = cset->ac->clabel;
3091
3092 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3093 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3094 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3095 /* XXX this needs to be made *much* more general */
3096 /* Too many failures */
3097 return(0);
3098 }
3099 /* otherwise, all is well, and we've got enough to take a kick
3100 at autoconfiguring this set */
3101 return(1);
3102 }
3103
3104 void
3105 rf_create_configuration(ac,config,raidPtr)
3106 RF_AutoConfig_t *ac;
3107 RF_Config_t *config;
3108 RF_Raid_t *raidPtr;
3109 {
3110 RF_ComponentLabel_t *clabel;
3111 int i;
3112
3113 clabel = ac->clabel;
3114
3115 /* 1. Fill in the common stuff */
3116 config->numRow = clabel->num_rows;
3117 config->numCol = clabel->num_columns;
3118 config->numSpare = 0; /* XXX should this be set here? */
3119 config->sectPerSU = clabel->sectPerSU;
3120 config->SUsPerPU = clabel->SUsPerPU;
3121 config->SUsPerRU = clabel->SUsPerRU;
3122 config->parityConfig = clabel->parityConfig;
3123 /* XXX... */
3124 strcpy(config->diskQueueType,"fifo");
3125 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3126 config->layoutSpecificSize = 0; /* XXX ?? */
3127
3128 while(ac!=NULL) {
3129 /* row/col values will be in range due to the checks
3130 in reasonable_label() */
3131 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3132 ac->devname);
3133 ac = ac->next;
3134 }
3135
3136 for(i=0;i<RF_MAXDBGV;i++) {
3137 config->debugVars[i][0] = NULL;
3138 }
3139 }
3140
3141 int
3142 rf_set_autoconfig(raidPtr, new_value)
3143 RF_Raid_t *raidPtr;
3144 int new_value;
3145 {
3146 RF_ComponentLabel_t clabel;
3147 struct vnode *vp;
3148 dev_t dev;
3149 int row, column;
3150
3151 raidPtr->autoconfigure = new_value;
3152 for(row=0; row<raidPtr->numRow; row++) {
3153 for(column=0; column<raidPtr->numCol; column++) {
3154 if (raidPtr->Disks[row][column].status ==
3155 rf_ds_optimal) {
3156 dev = raidPtr->Disks[row][column].dev;
3157 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3158 raidread_component_label(dev, vp, &clabel);
3159 clabel.autoconfigure = new_value;
3160 raidwrite_component_label(dev, vp, &clabel);
3161 }
3162 }
3163 }
3164 return(new_value);
3165 }
3166
3167 int
3168 rf_set_rootpartition(raidPtr, new_value)
3169 RF_Raid_t *raidPtr;
3170 int new_value;
3171 {
3172 RF_ComponentLabel_t clabel;
3173 struct vnode *vp;
3174 dev_t dev;
3175 int row, column;
3176
3177 raidPtr->root_partition = new_value;
3178 for(row=0; row<raidPtr->numRow; row++) {
3179 for(column=0; column<raidPtr->numCol; column++) {
3180 if (raidPtr->Disks[row][column].status ==
3181 rf_ds_optimal) {
3182 dev = raidPtr->Disks[row][column].dev;
3183 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3184 raidread_component_label(dev, vp, &clabel);
3185 clabel.root_partition = new_value;
3186 raidwrite_component_label(dev, vp, &clabel);
3187 }
3188 }
3189 }
3190 return(new_value);
3191 }
3192
3193 void
3194 rf_release_all_vps(cset)
3195 RF_ConfigSet_t *cset;
3196 {
3197 RF_AutoConfig_t *ac;
3198
3199 ac = cset->ac;
3200 while(ac!=NULL) {
3201 /* Close the vp, and give it back */
3202 if (ac->vp) {
3203 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3204 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3205 vput(ac->vp);
3206 ac->vp = NULL;
3207 }
3208 ac = ac->next;
3209 }
3210 }
3211
3212
3213 void
3214 rf_cleanup_config_set(cset)
3215 RF_ConfigSet_t *cset;
3216 {
3217 RF_AutoConfig_t *ac;
3218 RF_AutoConfig_t *next_ac;
3219
3220 ac = cset->ac;
3221 while(ac!=NULL) {
3222 next_ac = ac->next;
3223 /* nuke the label */
3224 free(ac->clabel, M_RAIDFRAME);
3225 /* cleanup the config structure */
3226 free(ac, M_RAIDFRAME);
3227 /* "next.." */
3228 ac = next_ac;
3229 }
3230 /* and, finally, nuke the config set */
3231 free(cset, M_RAIDFRAME);
3232 }
3233
3234
3235 void
3236 raid_init_component_label(raidPtr, clabel)
3237 RF_Raid_t *raidPtr;
3238 RF_ComponentLabel_t *clabel;
3239 {
3240 /* current version number */
3241 clabel->version = RF_COMPONENT_LABEL_VERSION;
3242 clabel->serial_number = raidPtr->serial_number;
3243 clabel->mod_counter = raidPtr->mod_counter;
3244 clabel->num_rows = raidPtr->numRow;
3245 clabel->num_columns = raidPtr->numCol;
3246 clabel->clean = RF_RAID_DIRTY; /* not clean */
3247 clabel->status = rf_ds_optimal; /* "It's good!" */
3248
3249 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3250 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3251 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3252
3253 clabel->blockSize = raidPtr->bytesPerSector;
3254 clabel->numBlocks = raidPtr->sectorsPerDisk;
3255
3256 /* XXX not portable */
3257 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3258 clabel->maxOutstanding = raidPtr->maxOutstanding;
3259 clabel->autoconfigure = raidPtr->autoconfigure;
3260 clabel->root_partition = raidPtr->root_partition;
3261 clabel->last_unit = raidPtr->raidid;
3262 clabel->config_order = raidPtr->config_order;
3263 }
3264
3265 int
3266 rf_auto_config_set(cset,unit)
3267 RF_ConfigSet_t *cset;
3268 int *unit;
3269 {
3270 RF_Raid_t *raidPtr;
3271 RF_Config_t *config;
3272 int raidID;
3273 int retcode;
3274
3275 #if DEBUG
3276 printf("RAID autoconfigure\n");
3277 #endif
3278
3279 retcode = 0;
3280 *unit = -1;
3281
3282 /* 1. Create a config structure */
3283
3284 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3285 M_RAIDFRAME,
3286 M_NOWAIT);
3287 if (config==NULL) {
3288 printf("Out of mem!?!?\n");
3289 /* XXX do something more intelligent here. */
3290 return(1);
3291 }
3292
3293 memset(config, 0, sizeof(RF_Config_t));
3294
3295 /* XXX raidID needs to be set correctly.. */
3296
3297 /*
3298 2. Figure out what RAID ID this one is supposed to live at
3299 See if we can get the same RAID dev that it was configured
3300 on last time..
3301 */
3302
3303 raidID = cset->ac->clabel->last_unit;
3304 if ((raidID < 0) || (raidID >= numraid)) {
3305 /* let's not wander off into lala land. */
3306 raidID = numraid - 1;
3307 }
3308 if (raidPtrs[raidID]->valid != 0) {
3309
3310 /*
3311 Nope... Go looking for an alternative...
3312 Start high so we don't immediately use raid0 if that's
3313 not taken.
3314 */
3315
3316 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3317 if (raidPtrs[raidID]->valid == 0) {
3318 /* can use this one! */
3319 break;
3320 }
3321 }
3322 }
3323
3324 if (raidID < 0) {
3325 /* punt... */
3326 printf("Unable to auto configure this set!\n");
3327 printf("(Out of RAID devs!)\n");
3328 return(1);
3329 }
3330
3331 #if DEBUG
3332 printf("Configuring raid%d:\n",raidID);
3333 #endif
3334
3335 raidPtr = raidPtrs[raidID];
3336
3337 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3338 raidPtr->raidid = raidID;
3339 raidPtr->openings = RAIDOUTSTANDING;
3340
3341 /* 3. Build the configuration structure */
3342 rf_create_configuration(cset->ac, config, raidPtr);
3343
3344 /* 4. Do the configuration */
3345 retcode = rf_Configure(raidPtr, config, cset->ac);
3346
3347 if (retcode == 0) {
3348
3349 raidinit(raidPtrs[raidID]);
3350
3351 rf_markalldirty(raidPtrs[raidID]);
3352 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3353 if (cset->ac->clabel->root_partition==1) {
3354 /* everything configured just fine. Make a note
3355 that this set is eligible to be root. */
3356 cset->rootable = 1;
3357 /* XXX do this here? */
3358 raidPtrs[raidID]->root_partition = 1;
3359 }
3360 }
3361
3362 /* 5. Cleanup */
3363 free(config, M_RAIDFRAME);
3364
3365 *unit = raidID;
3366 return(retcode);
3367 }
3368
3369 void
3370 rf_disk_unbusy(desc)
3371 RF_RaidAccessDesc_t *desc;
3372 {
3373 struct buf *bp;
3374
3375 bp = (struct buf *)desc->bp;
3376 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3377 (bp->b_bcount - bp->b_resid));
3378 }
3379