rf_netbsdkintf.c revision 1.117.6.4 1 /* $NetBSD: rf_netbsdkintf.c,v 1.117.6.4 2002/08/29 05:22:50 gehenna Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/cdefs.h>
117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.117.6.4 2002/08/29 05:22:50 gehenna Exp $");
118
119 #include <sys/param.h>
120 #include <sys/errno.h>
121 #include <sys/pool.h>
122 #include <sys/queue.h>
123 #include <sys/disk.h>
124 #include <sys/device.h>
125 #include <sys/stat.h>
126 #include <sys/ioctl.h>
127 #include <sys/fcntl.h>
128 #include <sys/systm.h>
129 #include <sys/namei.h>
130 #include <sys/vnode.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include <dev/raidframe/raidframevar.h>
139 #include <dev/raidframe/raidframeio.h>
140 #include "raid.h"
141 #include "opt_raid_autoconfig.h"
142 #include "rf_raid.h"
143 #include "rf_copyback.h"
144 #include "rf_dag.h"
145 #include "rf_dagflags.h"
146 #include "rf_desc.h"
147 #include "rf_diskqueue.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_kintf.h"
151 #include "rf_options.h"
152 #include "rf_driver.h"
153 #include "rf_parityscan.h"
154 #include "rf_threadstuff.h"
155
156 int rf_kdebug_level = 0;
157
158 #ifdef DEBUG
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #else /* DEBUG */
161 #define db1_printf(a) { }
162 #endif /* DEBUG */
163
164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
165
166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
167
168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
169 * spare table */
170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
171 * installation process */
172
173 /* prototypes */
174 static void KernelWakeupFunc(struct buf * bp);
175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
176 dev_t dev, RF_SectorNum_t startSect,
177 RF_SectorCount_t numSect, caddr_t buf,
178 void (*cbFunc) (struct buf *), void *cbArg,
179 int logBytesPerSector, struct proc * b_proc);
180 static void raidinit(RF_Raid_t *);
181
182 void raidattach(int);
183
184 dev_type_open(raidopen);
185 dev_type_close(raidclose);
186 dev_type_read(raidread);
187 dev_type_write(raidwrite);
188 dev_type_ioctl(raidioctl);
189 dev_type_strategy(raidstrategy);
190 dev_type_dump(raiddump);
191 dev_type_size(raidsize);
192
193 const struct bdevsw raid_bdevsw = {
194 raidopen, raidclose, raidstrategy, raidioctl,
195 raiddump, raidsize, D_DISK
196 };
197
198 const struct cdevsw raid_cdevsw = {
199 raidopen, raidclose, raidread, raidwrite, raidioctl,
200 nostop, notty, nopoll, nommap, D_DISK
201 };
202
203 /*
204 * Pilfered from ccd.c
205 */
206
207 struct raidbuf {
208 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
209 struct buf *rf_obp; /* ptr. to original I/O buf */
210 int rf_flags; /* misc. flags */
211 RF_DiskQueueData_t *req;/* the request that this was part of.. */
212 };
213
214 /* component buffer pool */
215 struct pool raidframe_cbufpool;
216
217 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
218 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
219
220 /* XXX Not sure if the following should be replacing the raidPtrs above,
221 or if it should be used in conjunction with that...
222 */
223
224 struct raid_softc {
225 int sc_flags; /* flags */
226 int sc_cflags; /* configuration flags */
227 size_t sc_size; /* size of the raid device */
228 char sc_xname[20]; /* XXX external name */
229 struct disk sc_dkdev; /* generic disk device info */
230 struct bufq_state buf_queue; /* used for the device queue */
231 };
232 /* sc_flags */
233 #define RAIDF_INITED 0x01 /* unit has been initialized */
234 #define RAIDF_WLABEL 0x02 /* label area is writable */
235 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
236 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
237 #define RAIDF_LOCKED 0x80 /* unit is locked */
238
239 #define raidunit(x) DISKUNIT(x)
240 int numraid = 0;
241
242 /*
243 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
244 * Be aware that large numbers can allow the driver to consume a lot of
245 * kernel memory, especially on writes, and in degraded mode reads.
246 *
247 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
248 * a single 64K write will typically require 64K for the old data,
249 * 64K for the old parity, and 64K for the new parity, for a total
250 * of 192K (if the parity buffer is not re-used immediately).
251 * Even it if is used immediately, that's still 128K, which when multiplied
252 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
253 *
254 * Now in degraded mode, for example, a 64K read on the above setup may
255 * require data reconstruction, which will require *all* of the 4 remaining
256 * disks to participate -- 4 * 32K/disk == 128K again.
257 */
258
259 #ifndef RAIDOUTSTANDING
260 #define RAIDOUTSTANDING 6
261 #endif
262
263 #define RAIDLABELDEV(dev) \
264 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
265
266 /* declared here, and made public, for the benefit of KVM stuff.. */
267 struct raid_softc *raid_softc;
268
269 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
270 struct disklabel *);
271 static void raidgetdisklabel(dev_t);
272 static void raidmakedisklabel(struct raid_softc *);
273
274 static int raidlock(struct raid_softc *);
275 static void raidunlock(struct raid_softc *);
276
277 static void rf_markalldirty(RF_Raid_t *);
278 void rf_mountroot_hook(struct device *);
279
280 struct device *raidrootdev;
281
282 void rf_ReconThread(struct rf_recon_req *);
283 /* XXX what I want is: */
284 /*void rf_ReconThread(RF_Raid_t *raidPtr); */
285 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
286 void rf_CopybackThread(RF_Raid_t *raidPtr);
287 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
288 void rf_buildroothack(void *);
289
290 RF_AutoConfig_t *rf_find_raid_components(void);
291 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
292 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
293 static int rf_reasonable_label(RF_ComponentLabel_t *);
294 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
295 int rf_set_autoconfig(RF_Raid_t *, int);
296 int rf_set_rootpartition(RF_Raid_t *, int);
297 void rf_release_all_vps(RF_ConfigSet_t *);
298 void rf_cleanup_config_set(RF_ConfigSet_t *);
299 int rf_have_enough_components(RF_ConfigSet_t *);
300 int rf_auto_config_set(RF_ConfigSet_t *, int *);
301
302 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
303 allow autoconfig to take place.
304 Note that this is overridden by having
305 RAID_AUTOCONFIG as an option in the
306 kernel config file. */
307
308 void
309 raidattach(num)
310 int num;
311 {
312 int raidID;
313 int i, rc;
314 RF_AutoConfig_t *ac_list; /* autoconfig list */
315 RF_ConfigSet_t *config_sets;
316
317 #ifdef DEBUG
318 printf("raidattach: Asked for %d units\n", num);
319 #endif
320
321 if (num <= 0) {
322 #ifdef DIAGNOSTIC
323 panic("raidattach: count <= 0");
324 #endif
325 return;
326 }
327 /* This is where all the initialization stuff gets done. */
328
329 numraid = num;
330
331 /* Make some space for requested number of units... */
332
333 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
334 if (raidPtrs == NULL) {
335 panic("raidPtrs is NULL!!\n");
336 }
337
338 /* Initialize the component buffer pool. */
339 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
340 0, 0, "raidpl", NULL);
341
342 rc = rf_mutex_init(&rf_sparet_wait_mutex);
343 if (rc) {
344 RF_PANIC();
345 }
346
347 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
348
349 for (i = 0; i < num; i++)
350 raidPtrs[i] = NULL;
351 rc = rf_BootRaidframe();
352 if (rc == 0)
353 printf("Kernelized RAIDframe activated\n");
354 else
355 panic("Serious error booting RAID!!\n");
356
357 /* put together some datastructures like the CCD device does.. This
358 * lets us lock the device and what-not when it gets opened. */
359
360 raid_softc = (struct raid_softc *)
361 malloc(num * sizeof(struct raid_softc),
362 M_RAIDFRAME, M_NOWAIT);
363 if (raid_softc == NULL) {
364 printf("WARNING: no memory for RAIDframe driver\n");
365 return;
366 }
367
368 memset(raid_softc, 0, num * sizeof(struct raid_softc));
369
370 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
371 M_RAIDFRAME, M_NOWAIT);
372 if (raidrootdev == NULL) {
373 panic("No memory for RAIDframe driver!!?!?!\n");
374 }
375
376 for (raidID = 0; raidID < num; raidID++) {
377 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
378
379 raidrootdev[raidID].dv_class = DV_DISK;
380 raidrootdev[raidID].dv_cfdata = NULL;
381 raidrootdev[raidID].dv_unit = raidID;
382 raidrootdev[raidID].dv_parent = NULL;
383 raidrootdev[raidID].dv_flags = 0;
384 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
385
386 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
387 (RF_Raid_t *));
388 if (raidPtrs[raidID] == NULL) {
389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
390 numraid = raidID;
391 return;
392 }
393 }
394
395 #ifdef RAID_AUTOCONFIG
396 raidautoconfig = 1;
397 #endif
398
399 if (raidautoconfig) {
400 /* 1. locate all RAID components on the system */
401
402 #if DEBUG
403 printf("Searching for raid components...\n");
404 #endif
405 ac_list = rf_find_raid_components();
406
407 /* 2. sort them into their respective sets */
408
409 config_sets = rf_create_auto_sets(ac_list);
410
411 /* 3. evaluate each set and configure the valid ones
412 This gets done in rf_buildroothack() */
413
414 /* schedule the creation of the thread to do the
415 "/ on RAID" stuff */
416
417 kthread_create(rf_buildroothack,config_sets);
418
419 #if 0
420 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
421 #endif
422 }
423
424 }
425
426 void
427 rf_buildroothack(arg)
428 void *arg;
429 {
430 RF_ConfigSet_t *config_sets = arg;
431 RF_ConfigSet_t *cset;
432 RF_ConfigSet_t *next_cset;
433 int retcode;
434 int raidID;
435 int rootID;
436 int num_root;
437
438 rootID = 0;
439 num_root = 0;
440 cset = config_sets;
441 while(cset != NULL ) {
442 next_cset = cset->next;
443 if (rf_have_enough_components(cset) &&
444 cset->ac->clabel->autoconfigure==1) {
445 retcode = rf_auto_config_set(cset,&raidID);
446 if (!retcode) {
447 if (cset->rootable) {
448 rootID = raidID;
449 num_root++;
450 }
451 } else {
452 /* The autoconfig didn't work :( */
453 #if DEBUG
454 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
455 #endif
456 rf_release_all_vps(cset);
457 }
458 } else {
459 /* we're not autoconfiguring this set...
460 release the associated resources */
461 rf_release_all_vps(cset);
462 }
463 /* cleanup */
464 rf_cleanup_config_set(cset);
465 cset = next_cset;
466 }
467
468 /* we found something bootable... */
469
470 if (num_root == 1) {
471 booted_device = &raidrootdev[rootID];
472 } else if (num_root > 1) {
473 /* we can't guess.. require the user to answer... */
474 boothowto |= RB_ASKNAME;
475 }
476 }
477
478
479 int
480 raidsize(dev)
481 dev_t dev;
482 {
483 struct raid_softc *rs;
484 struct disklabel *lp;
485 int part, unit, omask, size;
486
487 unit = raidunit(dev);
488 if (unit >= numraid)
489 return (-1);
490 rs = &raid_softc[unit];
491
492 if ((rs->sc_flags & RAIDF_INITED) == 0)
493 return (-1);
494
495 part = DISKPART(dev);
496 omask = rs->sc_dkdev.dk_openmask & (1 << part);
497 lp = rs->sc_dkdev.dk_label;
498
499 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 if (lp->d_partitions[part].p_fstype != FS_SWAP)
503 size = -1;
504 else
505 size = lp->d_partitions[part].p_size *
506 (lp->d_secsize / DEV_BSIZE);
507
508 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
509 return (-1);
510
511 return (size);
512
513 }
514
515 int
516 raiddump(dev, blkno, va, size)
517 dev_t dev;
518 daddr_t blkno;
519 caddr_t va;
520 size_t size;
521 {
522 /* Not implemented. */
523 return ENXIO;
524 }
525 /* ARGSUSED */
526 int
527 raidopen(dev, flags, fmt, p)
528 dev_t dev;
529 int flags, fmt;
530 struct proc *p;
531 {
532 int unit = raidunit(dev);
533 struct raid_softc *rs;
534 struct disklabel *lp;
535 int part, pmask;
536 int error = 0;
537
538 if (unit >= numraid)
539 return (ENXIO);
540 rs = &raid_softc[unit];
541
542 if ((error = raidlock(rs)) != 0)
543 return (error);
544 lp = rs->sc_dkdev.dk_label;
545
546 part = DISKPART(dev);
547 pmask = (1 << part);
548
549 db1_printf(("Opening raid device number: %d partition: %d\n",
550 unit, part));
551
552
553 if ((rs->sc_flags & RAIDF_INITED) &&
554 (rs->sc_dkdev.dk_openmask == 0))
555 raidgetdisklabel(dev);
556
557 /* make sure that this partition exists */
558
559 if (part != RAW_PART) {
560 db1_printf(("Not a raw partition..\n"));
561 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
562 ((part >= lp->d_npartitions) ||
563 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
564 error = ENXIO;
565 raidunlock(rs);
566 db1_printf(("Bailing out...\n"));
567 return (error);
568 }
569 }
570 /* Prevent this unit from being unconfigured while open. */
571 switch (fmt) {
572 case S_IFCHR:
573 rs->sc_dkdev.dk_copenmask |= pmask;
574 break;
575
576 case S_IFBLK:
577 rs->sc_dkdev.dk_bopenmask |= pmask;
578 break;
579 }
580
581 if ((rs->sc_dkdev.dk_openmask == 0) &&
582 ((rs->sc_flags & RAIDF_INITED) != 0)) {
583 /* First one... mark things as dirty... Note that we *MUST*
584 have done a configure before this. I DO NOT WANT TO BE
585 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
586 THAT THEY BELONG TOGETHER!!!!! */
587 /* XXX should check to see if we're only open for reading
588 here... If so, we needn't do this, but then need some
589 other way of keeping track of what's happened.. */
590
591 rf_markalldirty( raidPtrs[unit] );
592 }
593
594
595 rs->sc_dkdev.dk_openmask =
596 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
597
598 raidunlock(rs);
599
600 return (error);
601
602
603 }
604 /* ARGSUSED */
605 int
606 raidclose(dev, flags, fmt, p)
607 dev_t dev;
608 int flags, fmt;
609 struct proc *p;
610 {
611 int unit = raidunit(dev);
612 struct raid_softc *rs;
613 int error = 0;
614 int part;
615
616 if (unit >= numraid)
617 return (ENXIO);
618 rs = &raid_softc[unit];
619
620 if ((error = raidlock(rs)) != 0)
621 return (error);
622
623 part = DISKPART(dev);
624
625 /* ...that much closer to allowing unconfiguration... */
626 switch (fmt) {
627 case S_IFCHR:
628 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
629 break;
630
631 case S_IFBLK:
632 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
633 break;
634 }
635 rs->sc_dkdev.dk_openmask =
636 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
637
638 if ((rs->sc_dkdev.dk_openmask == 0) &&
639 ((rs->sc_flags & RAIDF_INITED) != 0)) {
640 /* Last one... device is not unconfigured yet.
641 Device shutdown has taken care of setting the
642 clean bits if RAIDF_INITED is not set
643 mark things as clean... */
644 #if 0
645 printf("Last one on raid%d. Updating status.\n",unit);
646 #endif
647 rf_update_component_labels(raidPtrs[unit],
648 RF_FINAL_COMPONENT_UPDATE);
649 if (doing_shutdown) {
650 /* last one, and we're going down, so
651 lights out for this RAID set too. */
652 error = rf_Shutdown(raidPtrs[unit]);
653
654 /* It's no longer initialized... */
655 rs->sc_flags &= ~RAIDF_INITED;
656
657 /* Detach the disk. */
658 disk_detach(&rs->sc_dkdev);
659 }
660 }
661
662 raidunlock(rs);
663 return (0);
664
665 }
666
667 void
668 raidstrategy(bp)
669 struct buf *bp;
670 {
671 int s;
672
673 unsigned int raidID = raidunit(bp->b_dev);
674 RF_Raid_t *raidPtr;
675 struct raid_softc *rs = &raid_softc[raidID];
676 struct disklabel *lp;
677 int wlabel;
678
679 if ((rs->sc_flags & RAIDF_INITED) ==0) {
680 bp->b_error = ENXIO;
681 bp->b_flags |= B_ERROR;
682 bp->b_resid = bp->b_bcount;
683 biodone(bp);
684 return;
685 }
686 if (raidID >= numraid || !raidPtrs[raidID]) {
687 bp->b_error = ENODEV;
688 bp->b_flags |= B_ERROR;
689 bp->b_resid = bp->b_bcount;
690 biodone(bp);
691 return;
692 }
693 raidPtr = raidPtrs[raidID];
694 if (!raidPtr->valid) {
695 bp->b_error = ENODEV;
696 bp->b_flags |= B_ERROR;
697 bp->b_resid = bp->b_bcount;
698 biodone(bp);
699 return;
700 }
701 if (bp->b_bcount == 0) {
702 db1_printf(("b_bcount is zero..\n"));
703 biodone(bp);
704 return;
705 }
706 lp = rs->sc_dkdev.dk_label;
707
708 /*
709 * Do bounds checking and adjust transfer. If there's an
710 * error, the bounds check will flag that for us.
711 */
712
713 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
714 if (DISKPART(bp->b_dev) != RAW_PART)
715 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
716 db1_printf(("Bounds check failed!!:%d %d\n",
717 (int) bp->b_blkno, (int) wlabel));
718 biodone(bp);
719 return;
720 }
721 s = splbio();
722
723 bp->b_resid = 0;
724
725 /* stuff it onto our queue */
726 BUFQ_PUT(&rs->buf_queue, bp);
727
728 raidstart(raidPtrs[raidID]);
729
730 splx(s);
731 }
732 /* ARGSUSED */
733 int
734 raidread(dev, uio, flags)
735 dev_t dev;
736 struct uio *uio;
737 int flags;
738 {
739 int unit = raidunit(dev);
740 struct raid_softc *rs;
741 int part;
742
743 if (unit >= numraid)
744 return (ENXIO);
745 rs = &raid_softc[unit];
746
747 if ((rs->sc_flags & RAIDF_INITED) == 0)
748 return (ENXIO);
749 part = DISKPART(dev);
750
751 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
752
753 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
754
755 }
756 /* ARGSUSED */
757 int
758 raidwrite(dev, uio, flags)
759 dev_t dev;
760 struct uio *uio;
761 int flags;
762 {
763 int unit = raidunit(dev);
764 struct raid_softc *rs;
765
766 if (unit >= numraid)
767 return (ENXIO);
768 rs = &raid_softc[unit];
769
770 if ((rs->sc_flags & RAIDF_INITED) == 0)
771 return (ENXIO);
772 db1_printf(("raidwrite\n"));
773 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
774
775 }
776
777 int
778 raidioctl(dev, cmd, data, flag, p)
779 dev_t dev;
780 u_long cmd;
781 caddr_t data;
782 int flag;
783 struct proc *p;
784 {
785 int unit = raidunit(dev);
786 int error = 0;
787 int part, pmask;
788 struct raid_softc *rs;
789 RF_Config_t *k_cfg, *u_cfg;
790 RF_Raid_t *raidPtr;
791 RF_RaidDisk_t *diskPtr;
792 RF_AccTotals_t *totals;
793 RF_DeviceConfig_t *d_cfg, **ucfgp;
794 u_char *specific_buf;
795 int retcode = 0;
796 int row;
797 int column;
798 int raidid;
799 struct rf_recon_req *rrcopy, *rr;
800 RF_ComponentLabel_t *clabel;
801 RF_ComponentLabel_t ci_label;
802 RF_ComponentLabel_t **clabel_ptr;
803 RF_SingleComponent_t *sparePtr,*componentPtr;
804 RF_SingleComponent_t hot_spare;
805 RF_SingleComponent_t component;
806 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
807 int i, j, d;
808 #ifdef __HAVE_OLD_DISKLABEL
809 struct disklabel newlabel;
810 #endif
811
812 if (unit >= numraid)
813 return (ENXIO);
814 rs = &raid_softc[unit];
815 raidPtr = raidPtrs[unit];
816
817 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
818 (int) DISKPART(dev), (int) unit, (int) cmd));
819
820 /* Must be open for writes for these commands... */
821 switch (cmd) {
822 case DIOCSDINFO:
823 case DIOCWDINFO:
824 #ifdef __HAVE_OLD_DISKLABEL
825 case ODIOCWDINFO:
826 case ODIOCSDINFO:
827 #endif
828 case DIOCWLABEL:
829 if ((flag & FWRITE) == 0)
830 return (EBADF);
831 }
832
833 /* Must be initialized for these... */
834 switch (cmd) {
835 case DIOCGDINFO:
836 case DIOCSDINFO:
837 case DIOCWDINFO:
838 #ifdef __HAVE_OLD_DISKLABEL
839 case ODIOCGDINFO:
840 case ODIOCWDINFO:
841 case ODIOCSDINFO:
842 case ODIOCGDEFLABEL:
843 #endif
844 case DIOCGPART:
845 case DIOCWLABEL:
846 case DIOCGDEFLABEL:
847 case RAIDFRAME_SHUTDOWN:
848 case RAIDFRAME_REWRITEPARITY:
849 case RAIDFRAME_GET_INFO:
850 case RAIDFRAME_RESET_ACCTOTALS:
851 case RAIDFRAME_GET_ACCTOTALS:
852 case RAIDFRAME_KEEP_ACCTOTALS:
853 case RAIDFRAME_GET_SIZE:
854 case RAIDFRAME_FAIL_DISK:
855 case RAIDFRAME_COPYBACK:
856 case RAIDFRAME_CHECK_RECON_STATUS:
857 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
858 case RAIDFRAME_GET_COMPONENT_LABEL:
859 case RAIDFRAME_SET_COMPONENT_LABEL:
860 case RAIDFRAME_ADD_HOT_SPARE:
861 case RAIDFRAME_REMOVE_HOT_SPARE:
862 case RAIDFRAME_INIT_LABELS:
863 case RAIDFRAME_REBUILD_IN_PLACE:
864 case RAIDFRAME_CHECK_PARITY:
865 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
866 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
867 case RAIDFRAME_CHECK_COPYBACK_STATUS:
868 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
869 case RAIDFRAME_SET_AUTOCONFIG:
870 case RAIDFRAME_SET_ROOT:
871 case RAIDFRAME_DELETE_COMPONENT:
872 case RAIDFRAME_INCORPORATE_HOT_SPARE:
873 if ((rs->sc_flags & RAIDF_INITED) == 0)
874 return (ENXIO);
875 }
876
877 switch (cmd) {
878
879 /* configure the system */
880 case RAIDFRAME_CONFIGURE:
881
882 if (raidPtr->valid) {
883 /* There is a valid RAID set running on this unit! */
884 printf("raid%d: Device already configured!\n",unit);
885 return(EINVAL);
886 }
887
888 /* copy-in the configuration information */
889 /* data points to a pointer to the configuration structure */
890
891 u_cfg = *((RF_Config_t **) data);
892 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
893 if (k_cfg == NULL) {
894 return (ENOMEM);
895 }
896 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
897 sizeof(RF_Config_t));
898 if (retcode) {
899 RF_Free(k_cfg, sizeof(RF_Config_t));
900 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
901 retcode));
902 return (retcode);
903 }
904 /* allocate a buffer for the layout-specific data, and copy it
905 * in */
906 if (k_cfg->layoutSpecificSize) {
907 if (k_cfg->layoutSpecificSize > 10000) {
908 /* sanity check */
909 RF_Free(k_cfg, sizeof(RF_Config_t));
910 return (EINVAL);
911 }
912 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
913 (u_char *));
914 if (specific_buf == NULL) {
915 RF_Free(k_cfg, sizeof(RF_Config_t));
916 return (ENOMEM);
917 }
918 retcode = copyin(k_cfg->layoutSpecific,
919 (caddr_t) specific_buf,
920 k_cfg->layoutSpecificSize);
921 if (retcode) {
922 RF_Free(k_cfg, sizeof(RF_Config_t));
923 RF_Free(specific_buf,
924 k_cfg->layoutSpecificSize);
925 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
926 retcode));
927 return (retcode);
928 }
929 } else
930 specific_buf = NULL;
931 k_cfg->layoutSpecific = specific_buf;
932
933 /* should do some kind of sanity check on the configuration.
934 * Store the sum of all the bytes in the last byte? */
935
936 /* configure the system */
937
938 /*
939 * Clear the entire RAID descriptor, just to make sure
940 * there is no stale data left in the case of a
941 * reconfiguration
942 */
943 memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
944 raidPtr->raidid = unit;
945
946 retcode = rf_Configure(raidPtr, k_cfg, NULL);
947
948 if (retcode == 0) {
949
950 /* allow this many simultaneous IO's to
951 this RAID device */
952 raidPtr->openings = RAIDOUTSTANDING;
953
954 raidinit(raidPtr);
955 rf_markalldirty(raidPtr);
956 }
957 /* free the buffers. No return code here. */
958 if (k_cfg->layoutSpecificSize) {
959 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
960 }
961 RF_Free(k_cfg, sizeof(RF_Config_t));
962
963 return (retcode);
964
965 /* shutdown the system */
966 case RAIDFRAME_SHUTDOWN:
967
968 if ((error = raidlock(rs)) != 0)
969 return (error);
970
971 /*
972 * If somebody has a partition mounted, we shouldn't
973 * shutdown.
974 */
975
976 part = DISKPART(dev);
977 pmask = (1 << part);
978 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
979 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
980 (rs->sc_dkdev.dk_copenmask & pmask))) {
981 raidunlock(rs);
982 return (EBUSY);
983 }
984
985 retcode = rf_Shutdown(raidPtr);
986
987 /* It's no longer initialized... */
988 rs->sc_flags &= ~RAIDF_INITED;
989
990 /* Detach the disk. */
991 disk_detach(&rs->sc_dkdev);
992
993 raidunlock(rs);
994
995 return (retcode);
996 case RAIDFRAME_GET_COMPONENT_LABEL:
997 clabel_ptr = (RF_ComponentLabel_t **) data;
998 /* need to read the component label for the disk indicated
999 by row,column in clabel */
1000
1001 /* For practice, let's get it directly fromdisk, rather
1002 than from the in-core copy */
1003 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1004 (RF_ComponentLabel_t *));
1005 if (clabel == NULL)
1006 return (ENOMEM);
1007
1008 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1009
1010 retcode = copyin( *clabel_ptr, clabel,
1011 sizeof(RF_ComponentLabel_t));
1012
1013 if (retcode) {
1014 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1015 return(retcode);
1016 }
1017
1018 row = clabel->row;
1019 column = clabel->column;
1020
1021 if ((row < 0) || (row >= raidPtr->numRow) ||
1022 (column < 0) || (column >= raidPtr->numCol +
1023 raidPtr->numSpare)) {
1024 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1025 return(EINVAL);
1026 }
1027
1028 raidread_component_label(raidPtr->Disks[row][column].dev,
1029 raidPtr->raid_cinfo[row][column].ci_vp,
1030 clabel );
1031
1032 retcode = copyout((caddr_t) clabel,
1033 (caddr_t) *clabel_ptr,
1034 sizeof(RF_ComponentLabel_t));
1035 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1036 return (retcode);
1037
1038 case RAIDFRAME_SET_COMPONENT_LABEL:
1039 clabel = (RF_ComponentLabel_t *) data;
1040
1041 /* XXX check the label for valid stuff... */
1042 /* Note that some things *should not* get modified --
1043 the user should be re-initing the labels instead of
1044 trying to patch things.
1045 */
1046
1047 raidid = raidPtr->raidid;
1048 printf("raid%d: Got component label:\n", raidid);
1049 printf("raid%d: Version: %d\n", raidid, clabel->version);
1050 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1051 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1052 printf("raid%d: Row: %d\n", raidid, clabel->row);
1053 printf("raid%d: Column: %d\n", raidid, clabel->column);
1054 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
1055 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1056 printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1057 printf("raid%d: Status: %d\n", raidid, clabel->status);
1058
1059 row = clabel->row;
1060 column = clabel->column;
1061
1062 if ((row < 0) || (row >= raidPtr->numRow) ||
1063 (column < 0) || (column >= raidPtr->numCol)) {
1064 return(EINVAL);
1065 }
1066
1067 /* XXX this isn't allowed to do anything for now :-) */
1068
1069 /* XXX and before it is, we need to fill in the rest
1070 of the fields!?!?!?! */
1071 #if 0
1072 raidwrite_component_label(
1073 raidPtr->Disks[row][column].dev,
1074 raidPtr->raid_cinfo[row][column].ci_vp,
1075 clabel );
1076 #endif
1077 return (0);
1078
1079 case RAIDFRAME_INIT_LABELS:
1080 clabel = (RF_ComponentLabel_t *) data;
1081 /*
1082 we only want the serial number from
1083 the above. We get all the rest of the information
1084 from the config that was used to create this RAID
1085 set.
1086 */
1087
1088 raidPtr->serial_number = clabel->serial_number;
1089
1090 raid_init_component_label(raidPtr, &ci_label);
1091 ci_label.serial_number = clabel->serial_number;
1092
1093 for(row=0;row<raidPtr->numRow;row++) {
1094 ci_label.row = row;
1095 for(column=0;column<raidPtr->numCol;column++) {
1096 diskPtr = &raidPtr->Disks[row][column];
1097 if (!RF_DEAD_DISK(diskPtr->status)) {
1098 ci_label.partitionSize = diskPtr->partitionSize;
1099 ci_label.column = column;
1100 raidwrite_component_label(
1101 raidPtr->Disks[row][column].dev,
1102 raidPtr->raid_cinfo[row][column].ci_vp,
1103 &ci_label );
1104 }
1105 }
1106 }
1107
1108 return (retcode);
1109 case RAIDFRAME_SET_AUTOCONFIG:
1110 d = rf_set_autoconfig(raidPtr, *(int *) data);
1111 printf("raid%d: New autoconfig value is: %d\n",
1112 raidPtr->raidid, d);
1113 *(int *) data = d;
1114 return (retcode);
1115
1116 case RAIDFRAME_SET_ROOT:
1117 d = rf_set_rootpartition(raidPtr, *(int *) data);
1118 printf("raid%d: New rootpartition value is: %d\n",
1119 raidPtr->raidid, d);
1120 *(int *) data = d;
1121 return (retcode);
1122
1123 /* initialize all parity */
1124 case RAIDFRAME_REWRITEPARITY:
1125
1126 if (raidPtr->Layout.map->faultsTolerated == 0) {
1127 /* Parity for RAID 0 is trivially correct */
1128 raidPtr->parity_good = RF_RAID_CLEAN;
1129 return(0);
1130 }
1131
1132 if (raidPtr->parity_rewrite_in_progress == 1) {
1133 /* Re-write is already in progress! */
1134 return(EINVAL);
1135 }
1136
1137 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1138 rf_RewriteParityThread,
1139 raidPtr,"raid_parity");
1140 return (retcode);
1141
1142
1143 case RAIDFRAME_ADD_HOT_SPARE:
1144 sparePtr = (RF_SingleComponent_t *) data;
1145 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1146 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1147 return(retcode);
1148
1149 case RAIDFRAME_REMOVE_HOT_SPARE:
1150 return(retcode);
1151
1152 case RAIDFRAME_DELETE_COMPONENT:
1153 componentPtr = (RF_SingleComponent_t *)data;
1154 memcpy( &component, componentPtr,
1155 sizeof(RF_SingleComponent_t));
1156 retcode = rf_delete_component(raidPtr, &component);
1157 return(retcode);
1158
1159 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1160 componentPtr = (RF_SingleComponent_t *)data;
1161 memcpy( &component, componentPtr,
1162 sizeof(RF_SingleComponent_t));
1163 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1164 return(retcode);
1165
1166 case RAIDFRAME_REBUILD_IN_PLACE:
1167
1168 if (raidPtr->Layout.map->faultsTolerated == 0) {
1169 /* Can't do this on a RAID 0!! */
1170 return(EINVAL);
1171 }
1172
1173 if (raidPtr->recon_in_progress == 1) {
1174 /* a reconstruct is already in progress! */
1175 return(EINVAL);
1176 }
1177
1178 componentPtr = (RF_SingleComponent_t *) data;
1179 memcpy( &component, componentPtr,
1180 sizeof(RF_SingleComponent_t));
1181 row = component.row;
1182 column = component.column;
1183 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
1184 row, column);
1185 if ((row < 0) || (row >= raidPtr->numRow) ||
1186 (column < 0) || (column >= raidPtr->numCol)) {
1187 return(EINVAL);
1188 }
1189
1190 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1191 if (rrcopy == NULL)
1192 return(ENOMEM);
1193
1194 rrcopy->raidPtr = (void *) raidPtr;
1195 rrcopy->row = row;
1196 rrcopy->col = column;
1197
1198 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1199 rf_ReconstructInPlaceThread,
1200 rrcopy,"raid_reconip");
1201 return(retcode);
1202
1203 case RAIDFRAME_GET_INFO:
1204 if (!raidPtr->valid)
1205 return (ENODEV);
1206 ucfgp = (RF_DeviceConfig_t **) data;
1207 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1208 (RF_DeviceConfig_t *));
1209 if (d_cfg == NULL)
1210 return (ENOMEM);
1211 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1212 d_cfg->rows = raidPtr->numRow;
1213 d_cfg->cols = raidPtr->numCol;
1214 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1215 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1216 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1217 return (ENOMEM);
1218 }
1219 d_cfg->nspares = raidPtr->numSpare;
1220 if (d_cfg->nspares >= RF_MAX_DISKS) {
1221 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1222 return (ENOMEM);
1223 }
1224 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1225 d = 0;
1226 for (i = 0; i < d_cfg->rows; i++) {
1227 for (j = 0; j < d_cfg->cols; j++) {
1228 d_cfg->devs[d] = raidPtr->Disks[i][j];
1229 d++;
1230 }
1231 }
1232 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1233 d_cfg->spares[i] = raidPtr->Disks[0][j];
1234 }
1235 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1236 sizeof(RF_DeviceConfig_t));
1237 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1238
1239 return (retcode);
1240
1241 case RAIDFRAME_CHECK_PARITY:
1242 *(int *) data = raidPtr->parity_good;
1243 return (0);
1244
1245 case RAIDFRAME_RESET_ACCTOTALS:
1246 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1247 return (0);
1248
1249 case RAIDFRAME_GET_ACCTOTALS:
1250 totals = (RF_AccTotals_t *) data;
1251 *totals = raidPtr->acc_totals;
1252 return (0);
1253
1254 case RAIDFRAME_KEEP_ACCTOTALS:
1255 raidPtr->keep_acc_totals = *(int *)data;
1256 return (0);
1257
1258 case RAIDFRAME_GET_SIZE:
1259 *(int *) data = raidPtr->totalSectors;
1260 return (0);
1261
1262 /* fail a disk & optionally start reconstruction */
1263 case RAIDFRAME_FAIL_DISK:
1264
1265 if (raidPtr->Layout.map->faultsTolerated == 0) {
1266 /* Can't do this on a RAID 0!! */
1267 return(EINVAL);
1268 }
1269
1270 rr = (struct rf_recon_req *) data;
1271
1272 if (rr->row < 0 || rr->row >= raidPtr->numRow
1273 || rr->col < 0 || rr->col >= raidPtr->numCol)
1274 return (EINVAL);
1275
1276 printf("raid%d: Failing the disk: row: %d col: %d\n",
1277 unit, rr->row, rr->col);
1278
1279 /* make a copy of the recon request so that we don't rely on
1280 * the user's buffer */
1281 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1282 if (rrcopy == NULL)
1283 return(ENOMEM);
1284 memcpy(rrcopy, rr, sizeof(*rr));
1285 rrcopy->raidPtr = (void *) raidPtr;
1286
1287 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1288 rf_ReconThread,
1289 rrcopy,"raid_recon");
1290 return (0);
1291
1292 /* invoke a copyback operation after recon on whatever disk
1293 * needs it, if any */
1294 case RAIDFRAME_COPYBACK:
1295
1296 if (raidPtr->Layout.map->faultsTolerated == 0) {
1297 /* This makes no sense on a RAID 0!! */
1298 return(EINVAL);
1299 }
1300
1301 if (raidPtr->copyback_in_progress == 1) {
1302 /* Copyback is already in progress! */
1303 return(EINVAL);
1304 }
1305
1306 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1307 rf_CopybackThread,
1308 raidPtr,"raid_copyback");
1309 return (retcode);
1310
1311 /* return the percentage completion of reconstruction */
1312 case RAIDFRAME_CHECK_RECON_STATUS:
1313 if (raidPtr->Layout.map->faultsTolerated == 0) {
1314 /* This makes no sense on a RAID 0, so tell the
1315 user it's done. */
1316 *(int *) data = 100;
1317 return(0);
1318 }
1319 row = 0; /* XXX we only consider a single row... */
1320 if (raidPtr->status[row] != rf_rs_reconstructing)
1321 *(int *) data = 100;
1322 else
1323 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1324 return (0);
1325 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1326 progressInfoPtr = (RF_ProgressInfo_t **) data;
1327 row = 0; /* XXX we only consider a single row... */
1328 if (raidPtr->status[row] != rf_rs_reconstructing) {
1329 progressInfo.remaining = 0;
1330 progressInfo.completed = 100;
1331 progressInfo.total = 100;
1332 } else {
1333 progressInfo.total =
1334 raidPtr->reconControl[row]->numRUsTotal;
1335 progressInfo.completed =
1336 raidPtr->reconControl[row]->numRUsComplete;
1337 progressInfo.remaining = progressInfo.total -
1338 progressInfo.completed;
1339 }
1340 retcode = copyout((caddr_t) &progressInfo,
1341 (caddr_t) *progressInfoPtr,
1342 sizeof(RF_ProgressInfo_t));
1343 return (retcode);
1344
1345 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1346 if (raidPtr->Layout.map->faultsTolerated == 0) {
1347 /* This makes no sense on a RAID 0, so tell the
1348 user it's done. */
1349 *(int *) data = 100;
1350 return(0);
1351 }
1352 if (raidPtr->parity_rewrite_in_progress == 1) {
1353 *(int *) data = 100 *
1354 raidPtr->parity_rewrite_stripes_done /
1355 raidPtr->Layout.numStripe;
1356 } else {
1357 *(int *) data = 100;
1358 }
1359 return (0);
1360
1361 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1362 progressInfoPtr = (RF_ProgressInfo_t **) data;
1363 if (raidPtr->parity_rewrite_in_progress == 1) {
1364 progressInfo.total = raidPtr->Layout.numStripe;
1365 progressInfo.completed =
1366 raidPtr->parity_rewrite_stripes_done;
1367 progressInfo.remaining = progressInfo.total -
1368 progressInfo.completed;
1369 } else {
1370 progressInfo.remaining = 0;
1371 progressInfo.completed = 100;
1372 progressInfo.total = 100;
1373 }
1374 retcode = copyout((caddr_t) &progressInfo,
1375 (caddr_t) *progressInfoPtr,
1376 sizeof(RF_ProgressInfo_t));
1377 return (retcode);
1378
1379 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1380 if (raidPtr->Layout.map->faultsTolerated == 0) {
1381 /* This makes no sense on a RAID 0 */
1382 *(int *) data = 100;
1383 return(0);
1384 }
1385 if (raidPtr->copyback_in_progress == 1) {
1386 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1387 raidPtr->Layout.numStripe;
1388 } else {
1389 *(int *) data = 100;
1390 }
1391 return (0);
1392
1393 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1394 progressInfoPtr = (RF_ProgressInfo_t **) data;
1395 if (raidPtr->copyback_in_progress == 1) {
1396 progressInfo.total = raidPtr->Layout.numStripe;
1397 progressInfo.completed =
1398 raidPtr->copyback_stripes_done;
1399 progressInfo.remaining = progressInfo.total -
1400 progressInfo.completed;
1401 } else {
1402 progressInfo.remaining = 0;
1403 progressInfo.completed = 100;
1404 progressInfo.total = 100;
1405 }
1406 retcode = copyout((caddr_t) &progressInfo,
1407 (caddr_t) *progressInfoPtr,
1408 sizeof(RF_ProgressInfo_t));
1409 return (retcode);
1410
1411 /* the sparetable daemon calls this to wait for the kernel to
1412 * need a spare table. this ioctl does not return until a
1413 * spare table is needed. XXX -- calling mpsleep here in the
1414 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1415 * -- I should either compute the spare table in the kernel,
1416 * or have a different -- XXX XXX -- interface (a different
1417 * character device) for delivering the table -- XXX */
1418 #if 0
1419 case RAIDFRAME_SPARET_WAIT:
1420 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1421 while (!rf_sparet_wait_queue)
1422 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1423 waitreq = rf_sparet_wait_queue;
1424 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1425 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1426
1427 /* structure assignment */
1428 *((RF_SparetWait_t *) data) = *waitreq;
1429
1430 RF_Free(waitreq, sizeof(*waitreq));
1431 return (0);
1432
1433 /* wakes up a process waiting on SPARET_WAIT and puts an error
1434 * code in it that will cause the dameon to exit */
1435 case RAIDFRAME_ABORT_SPARET_WAIT:
1436 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1437 waitreq->fcol = -1;
1438 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1439 waitreq->next = rf_sparet_wait_queue;
1440 rf_sparet_wait_queue = waitreq;
1441 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1442 wakeup(&rf_sparet_wait_queue);
1443 return (0);
1444
1445 /* used by the spare table daemon to deliver a spare table
1446 * into the kernel */
1447 case RAIDFRAME_SEND_SPARET:
1448
1449 /* install the spare table */
1450 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1451
1452 /* respond to the requestor. the return status of the spare
1453 * table installation is passed in the "fcol" field */
1454 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1455 waitreq->fcol = retcode;
1456 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1457 waitreq->next = rf_sparet_resp_queue;
1458 rf_sparet_resp_queue = waitreq;
1459 wakeup(&rf_sparet_resp_queue);
1460 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1461
1462 return (retcode);
1463 #endif
1464
1465 default:
1466 break; /* fall through to the os-specific code below */
1467
1468 }
1469
1470 if (!raidPtr->valid)
1471 return (EINVAL);
1472
1473 /*
1474 * Add support for "regular" device ioctls here.
1475 */
1476
1477 switch (cmd) {
1478 case DIOCGDINFO:
1479 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1480 break;
1481 #ifdef __HAVE_OLD_DISKLABEL
1482 case ODIOCGDINFO:
1483 newlabel = *(rs->sc_dkdev.dk_label);
1484 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1485 return ENOTTY;
1486 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1487 break;
1488 #endif
1489
1490 case DIOCGPART:
1491 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1492 ((struct partinfo *) data)->part =
1493 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1494 break;
1495
1496 case DIOCWDINFO:
1497 case DIOCSDINFO:
1498 #ifdef __HAVE_OLD_DISKLABEL
1499 case ODIOCWDINFO:
1500 case ODIOCSDINFO:
1501 #endif
1502 {
1503 struct disklabel *lp;
1504 #ifdef __HAVE_OLD_DISKLABEL
1505 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1506 memset(&newlabel, 0, sizeof newlabel);
1507 memcpy(&newlabel, data, sizeof (struct olddisklabel));
1508 lp = &newlabel;
1509 } else
1510 #endif
1511 lp = (struct disklabel *)data;
1512
1513 if ((error = raidlock(rs)) != 0)
1514 return (error);
1515
1516 rs->sc_flags |= RAIDF_LABELLING;
1517
1518 error = setdisklabel(rs->sc_dkdev.dk_label,
1519 lp, 0, rs->sc_dkdev.dk_cpulabel);
1520 if (error == 0) {
1521 if (cmd == DIOCWDINFO
1522 #ifdef __HAVE_OLD_DISKLABEL
1523 || cmd == ODIOCWDINFO
1524 #endif
1525 )
1526 error = writedisklabel(RAIDLABELDEV(dev),
1527 raidstrategy, rs->sc_dkdev.dk_label,
1528 rs->sc_dkdev.dk_cpulabel);
1529 }
1530 rs->sc_flags &= ~RAIDF_LABELLING;
1531
1532 raidunlock(rs);
1533
1534 if (error)
1535 return (error);
1536 break;
1537 }
1538
1539 case DIOCWLABEL:
1540 if (*(int *) data != 0)
1541 rs->sc_flags |= RAIDF_WLABEL;
1542 else
1543 rs->sc_flags &= ~RAIDF_WLABEL;
1544 break;
1545
1546 case DIOCGDEFLABEL:
1547 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1548 break;
1549
1550 #ifdef __HAVE_OLD_DISKLABEL
1551 case ODIOCGDEFLABEL:
1552 raidgetdefaultlabel(raidPtr, rs, &newlabel);
1553 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1554 return ENOTTY;
1555 memcpy(data, &newlabel, sizeof (struct olddisklabel));
1556 break;
1557 #endif
1558
1559 default:
1560 retcode = ENOTTY;
1561 }
1562 return (retcode);
1563
1564 }
1565
1566
1567 /* raidinit -- complete the rest of the initialization for the
1568 RAIDframe device. */
1569
1570
1571 static void
1572 raidinit(raidPtr)
1573 RF_Raid_t *raidPtr;
1574 {
1575 struct raid_softc *rs;
1576 int unit;
1577
1578 unit = raidPtr->raidid;
1579
1580 rs = &raid_softc[unit];
1581
1582 /* XXX should check return code first... */
1583 rs->sc_flags |= RAIDF_INITED;
1584
1585 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1586
1587 rs->sc_dkdev.dk_name = rs->sc_xname;
1588
1589 /* disk_attach actually creates space for the CPU disklabel, among
1590 * other things, so it's critical to call this *BEFORE* we try putzing
1591 * with disklabels. */
1592
1593 disk_attach(&rs->sc_dkdev);
1594
1595 /* XXX There may be a weird interaction here between this, and
1596 * protectedSectors, as used in RAIDframe. */
1597
1598 rs->sc_size = raidPtr->totalSectors;
1599
1600 }
1601
1602 /* wake up the daemon & tell it to get us a spare table
1603 * XXX
1604 * the entries in the queues should be tagged with the raidPtr
1605 * so that in the extremely rare case that two recons happen at once,
1606 * we know for which device were requesting a spare table
1607 * XXX
1608 *
1609 * XXX This code is not currently used. GO
1610 */
1611 int
1612 rf_GetSpareTableFromDaemon(req)
1613 RF_SparetWait_t *req;
1614 {
1615 int retcode;
1616
1617 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1618 req->next = rf_sparet_wait_queue;
1619 rf_sparet_wait_queue = req;
1620 wakeup(&rf_sparet_wait_queue);
1621
1622 /* mpsleep unlocks the mutex */
1623 while (!rf_sparet_resp_queue) {
1624 tsleep(&rf_sparet_resp_queue, PRIBIO,
1625 "raidframe getsparetable", 0);
1626 }
1627 req = rf_sparet_resp_queue;
1628 rf_sparet_resp_queue = req->next;
1629 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1630
1631 retcode = req->fcol;
1632 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1633 * alloc'd */
1634 return (retcode);
1635 }
1636
1637 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1638 * bp & passes it down.
1639 * any calls originating in the kernel must use non-blocking I/O
1640 * do some extra sanity checking to return "appropriate" error values for
1641 * certain conditions (to make some standard utilities work)
1642 *
1643 * Formerly known as: rf_DoAccessKernel
1644 */
1645 void
1646 raidstart(raidPtr)
1647 RF_Raid_t *raidPtr;
1648 {
1649 RF_SectorCount_t num_blocks, pb, sum;
1650 RF_RaidAddr_t raid_addr;
1651 int retcode;
1652 struct partition *pp;
1653 daddr_t blocknum;
1654 int unit;
1655 struct raid_softc *rs;
1656 int do_async;
1657 struct buf *bp;
1658
1659 unit = raidPtr->raidid;
1660 rs = &raid_softc[unit];
1661
1662 /* quick check to see if anything has died recently */
1663 RF_LOCK_MUTEX(raidPtr->mutex);
1664 if (raidPtr->numNewFailures > 0) {
1665 rf_update_component_labels(raidPtr,
1666 RF_NORMAL_COMPONENT_UPDATE);
1667 raidPtr->numNewFailures--;
1668 }
1669
1670 /* Check to see if we're at the limit... */
1671 while (raidPtr->openings > 0) {
1672 RF_UNLOCK_MUTEX(raidPtr->mutex);
1673
1674 /* get the next item, if any, from the queue */
1675 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1676 /* nothing more to do */
1677 return;
1678 }
1679
1680 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1681 * partition.. Need to make it absolute to the underlying
1682 * device.. */
1683
1684 blocknum = bp->b_blkno;
1685 if (DISKPART(bp->b_dev) != RAW_PART) {
1686 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1687 blocknum += pp->p_offset;
1688 }
1689
1690 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1691 (int) blocknum));
1692
1693 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1694 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1695
1696 /* *THIS* is where we adjust what block we're going to...
1697 * but DO NOT TOUCH bp->b_blkno!!! */
1698 raid_addr = blocknum;
1699
1700 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1701 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1702 sum = raid_addr + num_blocks + pb;
1703 if (1 || rf_debugKernelAccess) {
1704 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1705 (int) raid_addr, (int) sum, (int) num_blocks,
1706 (int) pb, (int) bp->b_resid));
1707 }
1708 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1709 || (sum < num_blocks) || (sum < pb)) {
1710 bp->b_error = ENOSPC;
1711 bp->b_flags |= B_ERROR;
1712 bp->b_resid = bp->b_bcount;
1713 biodone(bp);
1714 RF_LOCK_MUTEX(raidPtr->mutex);
1715 continue;
1716 }
1717 /*
1718 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1719 */
1720
1721 if (bp->b_bcount & raidPtr->sectorMask) {
1722 bp->b_error = EINVAL;
1723 bp->b_flags |= B_ERROR;
1724 bp->b_resid = bp->b_bcount;
1725 biodone(bp);
1726 RF_LOCK_MUTEX(raidPtr->mutex);
1727 continue;
1728
1729 }
1730 db1_printf(("Calling DoAccess..\n"));
1731
1732
1733 RF_LOCK_MUTEX(raidPtr->mutex);
1734 raidPtr->openings--;
1735 RF_UNLOCK_MUTEX(raidPtr->mutex);
1736
1737 /*
1738 * Everything is async.
1739 */
1740 do_async = 1;
1741
1742 disk_busy(&rs->sc_dkdev);
1743
1744 /* XXX we're still at splbio() here... do we *really*
1745 need to be? */
1746
1747 /* don't ever condition on bp->b_flags & B_WRITE.
1748 * always condition on B_READ instead */
1749
1750 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1751 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1752 do_async, raid_addr, num_blocks,
1753 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1754
1755 RF_LOCK_MUTEX(raidPtr->mutex);
1756 }
1757 RF_UNLOCK_MUTEX(raidPtr->mutex);
1758 }
1759
1760
1761
1762
1763 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1764
1765 int
1766 rf_DispatchKernelIO(queue, req)
1767 RF_DiskQueue_t *queue;
1768 RF_DiskQueueData_t *req;
1769 {
1770 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1771 struct buf *bp;
1772 struct raidbuf *raidbp = NULL;
1773 struct raid_softc *rs;
1774 int unit;
1775 int s;
1776
1777 s=0;
1778 /* s = splbio();*/ /* want to test this */
1779 /* XXX along with the vnode, we also need the softc associated with
1780 * this device.. */
1781
1782 req->queue = queue;
1783
1784 unit = queue->raidPtr->raidid;
1785
1786 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1787
1788 if (unit >= numraid) {
1789 printf("Invalid unit number: %d %d\n", unit, numraid);
1790 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1791 }
1792 rs = &raid_softc[unit];
1793
1794 bp = req->bp;
1795 #if 1
1796 /* XXX when there is a physical disk failure, someone is passing us a
1797 * buffer that contains old stuff!! Attempt to deal with this problem
1798 * without taking a performance hit... (not sure where the real bug
1799 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1800
1801 if (bp->b_flags & B_ERROR) {
1802 bp->b_flags &= ~B_ERROR;
1803 }
1804 if (bp->b_error != 0) {
1805 bp->b_error = 0;
1806 }
1807 #endif
1808 raidbp = RAIDGETBUF(rs);
1809
1810 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1811
1812 /*
1813 * context for raidiodone
1814 */
1815 raidbp->rf_obp = bp;
1816 raidbp->req = req;
1817
1818 LIST_INIT(&raidbp->rf_buf.b_dep);
1819
1820 switch (req->type) {
1821 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1822 /* XXX need to do something extra here.. */
1823 /* I'm leaving this in, as I've never actually seen it used,
1824 * and I'd like folks to report it... GO */
1825 printf(("WAKEUP CALLED\n"));
1826 queue->numOutstanding++;
1827
1828 /* XXX need to glue the original buffer into this?? */
1829
1830 KernelWakeupFunc(&raidbp->rf_buf);
1831 break;
1832
1833 case RF_IO_TYPE_READ:
1834 case RF_IO_TYPE_WRITE:
1835
1836 if (req->tracerec) {
1837 RF_ETIMER_START(req->tracerec->timer);
1838 }
1839 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1840 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1841 req->sectorOffset, req->numSector,
1842 req->buf, KernelWakeupFunc, (void *) req,
1843 queue->raidPtr->logBytesPerSector, req->b_proc);
1844
1845 if (rf_debugKernelAccess) {
1846 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1847 (long) bp->b_blkno));
1848 }
1849 queue->numOutstanding++;
1850 queue->last_deq_sector = req->sectorOffset;
1851 /* acc wouldn't have been let in if there were any pending
1852 * reqs at any other priority */
1853 queue->curPriority = req->priority;
1854
1855 db1_printf(("Going for %c to unit %d row %d col %d\n",
1856 req->type, unit, queue->row, queue->col));
1857 db1_printf(("sector %d count %d (%d bytes) %d\n",
1858 (int) req->sectorOffset, (int) req->numSector,
1859 (int) (req->numSector <<
1860 queue->raidPtr->logBytesPerSector),
1861 (int) queue->raidPtr->logBytesPerSector));
1862 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1863 raidbp->rf_buf.b_vp->v_numoutput++;
1864 }
1865 VOP_STRATEGY(&raidbp->rf_buf);
1866
1867 break;
1868
1869 default:
1870 panic("bad req->type in rf_DispatchKernelIO");
1871 }
1872 db1_printf(("Exiting from DispatchKernelIO\n"));
1873 /* splx(s); */ /* want to test this */
1874 return (0);
1875 }
1876 /* this is the callback function associated with a I/O invoked from
1877 kernel code.
1878 */
1879 static void
1880 KernelWakeupFunc(vbp)
1881 struct buf *vbp;
1882 {
1883 RF_DiskQueueData_t *req = NULL;
1884 RF_DiskQueue_t *queue;
1885 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1886 struct buf *bp;
1887 struct raid_softc *rs;
1888 int unit;
1889 int s;
1890
1891 s = splbio();
1892 db1_printf(("recovering the request queue:\n"));
1893 req = raidbp->req;
1894
1895 bp = raidbp->rf_obp;
1896
1897 queue = (RF_DiskQueue_t *) req->queue;
1898
1899 if (raidbp->rf_buf.b_flags & B_ERROR) {
1900 bp->b_flags |= B_ERROR;
1901 bp->b_error = raidbp->rf_buf.b_error ?
1902 raidbp->rf_buf.b_error : EIO;
1903 }
1904
1905 /* XXX methinks this could be wrong... */
1906 #if 1
1907 bp->b_resid = raidbp->rf_buf.b_resid;
1908 #endif
1909
1910 if (req->tracerec) {
1911 RF_ETIMER_STOP(req->tracerec->timer);
1912 RF_ETIMER_EVAL(req->tracerec->timer);
1913 RF_LOCK_MUTEX(rf_tracing_mutex);
1914 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1915 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1916 req->tracerec->num_phys_ios++;
1917 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1918 }
1919 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1920
1921 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1922
1923
1924 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1925 * ballistic, and mark the component as hosed... */
1926
1927 if (bp->b_flags & B_ERROR) {
1928 /* Mark the disk as dead */
1929 /* but only mark it once... */
1930 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1931 rf_ds_optimal) {
1932 printf("raid%d: IO Error. Marking %s as failed.\n",
1933 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1934 queue->raidPtr->Disks[queue->row][queue->col].status =
1935 rf_ds_failed;
1936 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1937 queue->raidPtr->numFailures++;
1938 queue->raidPtr->numNewFailures++;
1939 } else { /* Disk is already dead... */
1940 /* printf("Disk already marked as dead!\n"); */
1941 }
1942
1943 }
1944
1945 rs = &raid_softc[unit];
1946 RAIDPUTBUF(rs, raidbp);
1947
1948 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1949 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1950
1951 splx(s);
1952 }
1953
1954
1955
1956 /*
1957 * initialize a buf structure for doing an I/O in the kernel.
1958 */
1959 static void
1960 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1961 logBytesPerSector, b_proc)
1962 struct buf *bp;
1963 struct vnode *b_vp;
1964 unsigned rw_flag;
1965 dev_t dev;
1966 RF_SectorNum_t startSect;
1967 RF_SectorCount_t numSect;
1968 caddr_t buf;
1969 void (*cbFunc) (struct buf *);
1970 void *cbArg;
1971 int logBytesPerSector;
1972 struct proc *b_proc;
1973 {
1974 /* bp->b_flags = B_PHYS | rw_flag; */
1975 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1976 bp->b_bcount = numSect << logBytesPerSector;
1977 bp->b_bufsize = bp->b_bcount;
1978 bp->b_error = 0;
1979 bp->b_dev = dev;
1980 bp->b_data = buf;
1981 bp->b_blkno = startSect;
1982 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1983 if (bp->b_bcount == 0) {
1984 panic("bp->b_bcount is zero in InitBP!!\n");
1985 }
1986 bp->b_proc = b_proc;
1987 bp->b_iodone = cbFunc;
1988 bp->b_vp = b_vp;
1989
1990 }
1991
1992 static void
1993 raidgetdefaultlabel(raidPtr, rs, lp)
1994 RF_Raid_t *raidPtr;
1995 struct raid_softc *rs;
1996 struct disklabel *lp;
1997 {
1998 db1_printf(("Building a default label...\n"));
1999 memset(lp, 0, sizeof(*lp));
2000
2001 /* fabricate a label... */
2002 lp->d_secperunit = raidPtr->totalSectors;
2003 lp->d_secsize = raidPtr->bytesPerSector;
2004 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2005 lp->d_ntracks = 4 * raidPtr->numCol;
2006 lp->d_ncylinders = raidPtr->totalSectors /
2007 (lp->d_nsectors * lp->d_ntracks);
2008 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2009
2010 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2011 lp->d_type = DTYPE_RAID;
2012 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2013 lp->d_rpm = 3600;
2014 lp->d_interleave = 1;
2015 lp->d_flags = 0;
2016
2017 lp->d_partitions[RAW_PART].p_offset = 0;
2018 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2019 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2020 lp->d_npartitions = RAW_PART + 1;
2021
2022 lp->d_magic = DISKMAGIC;
2023 lp->d_magic2 = DISKMAGIC;
2024 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2025
2026 }
2027 /*
2028 * Read the disklabel from the raid device. If one is not present, fake one
2029 * up.
2030 */
2031 static void
2032 raidgetdisklabel(dev)
2033 dev_t dev;
2034 {
2035 int unit = raidunit(dev);
2036 struct raid_softc *rs = &raid_softc[unit];
2037 char *errstring;
2038 struct disklabel *lp = rs->sc_dkdev.dk_label;
2039 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2040 RF_Raid_t *raidPtr;
2041
2042 db1_printf(("Getting the disklabel...\n"));
2043
2044 memset(clp, 0, sizeof(*clp));
2045
2046 raidPtr = raidPtrs[unit];
2047
2048 raidgetdefaultlabel(raidPtr, rs, lp);
2049
2050 /*
2051 * Call the generic disklabel extraction routine.
2052 */
2053 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2054 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2055 if (errstring)
2056 raidmakedisklabel(rs);
2057 else {
2058 int i;
2059 struct partition *pp;
2060
2061 /*
2062 * Sanity check whether the found disklabel is valid.
2063 *
2064 * This is necessary since total size of the raid device
2065 * may vary when an interleave is changed even though exactly
2066 * same componets are used, and old disklabel may used
2067 * if that is found.
2068 */
2069 if (lp->d_secperunit != rs->sc_size)
2070 printf("raid%d: WARNING: %s: "
2071 "total sector size in disklabel (%d) != "
2072 "the size of raid (%ld)\n", unit, rs->sc_xname,
2073 lp->d_secperunit, (long) rs->sc_size);
2074 for (i = 0; i < lp->d_npartitions; i++) {
2075 pp = &lp->d_partitions[i];
2076 if (pp->p_offset + pp->p_size > rs->sc_size)
2077 printf("raid%d: WARNING: %s: end of partition `%c' "
2078 "exceeds the size of raid (%ld)\n",
2079 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
2080 }
2081 }
2082
2083 }
2084 /*
2085 * Take care of things one might want to take care of in the event
2086 * that a disklabel isn't present.
2087 */
2088 static void
2089 raidmakedisklabel(rs)
2090 struct raid_softc *rs;
2091 {
2092 struct disklabel *lp = rs->sc_dkdev.dk_label;
2093 db1_printf(("Making a label..\n"));
2094
2095 /*
2096 * For historical reasons, if there's no disklabel present
2097 * the raw partition must be marked FS_BSDFFS.
2098 */
2099
2100 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2101
2102 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2103
2104 lp->d_checksum = dkcksum(lp);
2105 }
2106 /*
2107 * Lookup the provided name in the filesystem. If the file exists,
2108 * is a valid block device, and isn't being used by anyone else,
2109 * set *vpp to the file's vnode.
2110 * You'll find the original of this in ccd.c
2111 */
2112 int
2113 raidlookup(path, p, vpp)
2114 char *path;
2115 struct proc *p;
2116 struct vnode **vpp; /* result */
2117 {
2118 struct nameidata nd;
2119 struct vnode *vp;
2120 struct vattr va;
2121 int error;
2122
2123 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2124 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2125 #if 0
2126 printf("RAIDframe: vn_open returned %d\n", error);
2127 #endif
2128 return (error);
2129 }
2130 vp = nd.ni_vp;
2131 if (vp->v_usecount > 1) {
2132 VOP_UNLOCK(vp, 0);
2133 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2134 return (EBUSY);
2135 }
2136 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2137 VOP_UNLOCK(vp, 0);
2138 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2139 return (error);
2140 }
2141 /* XXX: eventually we should handle VREG, too. */
2142 if (va.va_type != VBLK) {
2143 VOP_UNLOCK(vp, 0);
2144 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2145 return (ENOTBLK);
2146 }
2147 VOP_UNLOCK(vp, 0);
2148 *vpp = vp;
2149 return (0);
2150 }
2151 /*
2152 * Wait interruptibly for an exclusive lock.
2153 *
2154 * XXX
2155 * Several drivers do this; it should be abstracted and made MP-safe.
2156 * (Hmm... where have we seen this warning before :-> GO )
2157 */
2158 static int
2159 raidlock(rs)
2160 struct raid_softc *rs;
2161 {
2162 int error;
2163
2164 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2165 rs->sc_flags |= RAIDF_WANTED;
2166 if ((error =
2167 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2168 return (error);
2169 }
2170 rs->sc_flags |= RAIDF_LOCKED;
2171 return (0);
2172 }
2173 /*
2174 * Unlock and wake up any waiters.
2175 */
2176 static void
2177 raidunlock(rs)
2178 struct raid_softc *rs;
2179 {
2180
2181 rs->sc_flags &= ~RAIDF_LOCKED;
2182 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2183 rs->sc_flags &= ~RAIDF_WANTED;
2184 wakeup(rs);
2185 }
2186 }
2187
2188
2189 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2190 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2191
2192 int
2193 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2194 {
2195 RF_ComponentLabel_t clabel;
2196 raidread_component_label(dev, b_vp, &clabel);
2197 clabel.mod_counter = mod_counter;
2198 clabel.clean = RF_RAID_CLEAN;
2199 raidwrite_component_label(dev, b_vp, &clabel);
2200 return(0);
2201 }
2202
2203
2204 int
2205 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2206 {
2207 RF_ComponentLabel_t clabel;
2208 raidread_component_label(dev, b_vp, &clabel);
2209 clabel.mod_counter = mod_counter;
2210 clabel.clean = RF_RAID_DIRTY;
2211 raidwrite_component_label(dev, b_vp, &clabel);
2212 return(0);
2213 }
2214
2215 /* ARGSUSED */
2216 int
2217 raidread_component_label(dev, b_vp, clabel)
2218 dev_t dev;
2219 struct vnode *b_vp;
2220 RF_ComponentLabel_t *clabel;
2221 {
2222 struct buf *bp;
2223 const struct bdevsw *bdev;
2224 int error;
2225
2226 /* XXX should probably ensure that we don't try to do this if
2227 someone has changed rf_protected_sectors. */
2228
2229 if (b_vp == NULL) {
2230 /* For whatever reason, this component is not valid.
2231 Don't try to read a component label from it. */
2232 return(EINVAL);
2233 }
2234
2235 /* get a block of the appropriate size... */
2236 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2237 bp->b_dev = dev;
2238
2239 /* get our ducks in a row for the read */
2240 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2241 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2242 bp->b_flags |= B_READ;
2243 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2244
2245 bdev = bdevsw_lookup(bp->b_dev);
2246 if (bdev == NULL)
2247 return (ENXIO);
2248 (*bdev->d_strategy)(bp);
2249
2250 error = biowait(bp);
2251
2252 if (!error) {
2253 memcpy(clabel, bp->b_data,
2254 sizeof(RF_ComponentLabel_t));
2255 #if 0
2256 rf_print_component_label( clabel );
2257 #endif
2258 } else {
2259 #if 0
2260 printf("Failed to read RAID component label!\n");
2261 #endif
2262 }
2263
2264 brelse(bp);
2265 return(error);
2266 }
2267 /* ARGSUSED */
2268 int
2269 raidwrite_component_label(dev, b_vp, clabel)
2270 dev_t dev;
2271 struct vnode *b_vp;
2272 RF_ComponentLabel_t *clabel;
2273 {
2274 struct buf *bp;
2275 const struct bdevsw *bdev;
2276 int error;
2277
2278 /* get a block of the appropriate size... */
2279 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2280 bp->b_dev = dev;
2281
2282 /* get our ducks in a row for the write */
2283 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2284 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2285 bp->b_flags |= B_WRITE;
2286 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2287
2288 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2289
2290 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2291
2292 bdev = bdevsw_lookup(bp->b_dev);
2293 if (bdev == NULL)
2294 return (ENXIO);
2295 (*bdev->d_strategy)(bp);
2296 error = biowait(bp);
2297 brelse(bp);
2298 if (error) {
2299 #if 1
2300 printf("Failed to write RAID component info!\n");
2301 #endif
2302 }
2303
2304 return(error);
2305 }
2306
2307 void
2308 rf_markalldirty(raidPtr)
2309 RF_Raid_t *raidPtr;
2310 {
2311 RF_ComponentLabel_t clabel;
2312 int r,c;
2313
2314 raidPtr->mod_counter++;
2315 for (r = 0; r < raidPtr->numRow; r++) {
2316 for (c = 0; c < raidPtr->numCol; c++) {
2317 /* we don't want to touch (at all) a disk that has
2318 failed */
2319 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2320 raidread_component_label(
2321 raidPtr->Disks[r][c].dev,
2322 raidPtr->raid_cinfo[r][c].ci_vp,
2323 &clabel);
2324 if (clabel.status == rf_ds_spared) {
2325 /* XXX do something special...
2326 but whatever you do, don't
2327 try to access it!! */
2328 } else {
2329 #if 0
2330 clabel.status =
2331 raidPtr->Disks[r][c].status;
2332 raidwrite_component_label(
2333 raidPtr->Disks[r][c].dev,
2334 raidPtr->raid_cinfo[r][c].ci_vp,
2335 &clabel);
2336 #endif
2337 raidmarkdirty(
2338 raidPtr->Disks[r][c].dev,
2339 raidPtr->raid_cinfo[r][c].ci_vp,
2340 raidPtr->mod_counter);
2341 }
2342 }
2343 }
2344 }
2345 /* printf("Component labels marked dirty.\n"); */
2346 #if 0
2347 for( c = 0; c < raidPtr->numSpare ; c++) {
2348 sparecol = raidPtr->numCol + c;
2349 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2350 /*
2351
2352 XXX this is where we get fancy and map this spare
2353 into it's correct spot in the array.
2354
2355 */
2356 /*
2357
2358 we claim this disk is "optimal" if it's
2359 rf_ds_used_spare, as that means it should be
2360 directly substitutable for the disk it replaced.
2361 We note that too...
2362
2363 */
2364
2365 for(i=0;i<raidPtr->numRow;i++) {
2366 for(j=0;j<raidPtr->numCol;j++) {
2367 if ((raidPtr->Disks[i][j].spareRow ==
2368 r) &&
2369 (raidPtr->Disks[i][j].spareCol ==
2370 sparecol)) {
2371 srow = r;
2372 scol = sparecol;
2373 break;
2374 }
2375 }
2376 }
2377
2378 raidread_component_label(
2379 raidPtr->Disks[r][sparecol].dev,
2380 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2381 &clabel);
2382 /* make sure status is noted */
2383 clabel.version = RF_COMPONENT_LABEL_VERSION;
2384 clabel.mod_counter = raidPtr->mod_counter;
2385 clabel.serial_number = raidPtr->serial_number;
2386 clabel.row = srow;
2387 clabel.column = scol;
2388 clabel.num_rows = raidPtr->numRow;
2389 clabel.num_columns = raidPtr->numCol;
2390 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2391 clabel.status = rf_ds_optimal;
2392 raidwrite_component_label(
2393 raidPtr->Disks[r][sparecol].dev,
2394 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2395 &clabel);
2396 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2397 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2398 }
2399 }
2400
2401 #endif
2402 }
2403
2404
2405 void
2406 rf_update_component_labels(raidPtr, final)
2407 RF_Raid_t *raidPtr;
2408 int final;
2409 {
2410 RF_ComponentLabel_t clabel;
2411 int sparecol;
2412 int r,c;
2413 int i,j;
2414 int srow, scol;
2415
2416 srow = -1;
2417 scol = -1;
2418
2419 /* XXX should do extra checks to make sure things really are clean,
2420 rather than blindly setting the clean bit... */
2421
2422 raidPtr->mod_counter++;
2423
2424 for (r = 0; r < raidPtr->numRow; r++) {
2425 for (c = 0; c < raidPtr->numCol; c++) {
2426 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2427 raidread_component_label(
2428 raidPtr->Disks[r][c].dev,
2429 raidPtr->raid_cinfo[r][c].ci_vp,
2430 &clabel);
2431 /* make sure status is noted */
2432 clabel.status = rf_ds_optimal;
2433 /* bump the counter */
2434 clabel.mod_counter = raidPtr->mod_counter;
2435
2436 raidwrite_component_label(
2437 raidPtr->Disks[r][c].dev,
2438 raidPtr->raid_cinfo[r][c].ci_vp,
2439 &clabel);
2440 if (final == RF_FINAL_COMPONENT_UPDATE) {
2441 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2442 raidmarkclean(
2443 raidPtr->Disks[r][c].dev,
2444 raidPtr->raid_cinfo[r][c].ci_vp,
2445 raidPtr->mod_counter);
2446 }
2447 }
2448 }
2449 /* else we don't touch it.. */
2450 }
2451 }
2452
2453 for( c = 0; c < raidPtr->numSpare ; c++) {
2454 sparecol = raidPtr->numCol + c;
2455 /* Need to ensure that the reconstruct actually completed! */
2456 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2457 /*
2458
2459 we claim this disk is "optimal" if it's
2460 rf_ds_used_spare, as that means it should be
2461 directly substitutable for the disk it replaced.
2462 We note that too...
2463
2464 */
2465
2466 for(i=0;i<raidPtr->numRow;i++) {
2467 for(j=0;j<raidPtr->numCol;j++) {
2468 if ((raidPtr->Disks[i][j].spareRow ==
2469 0) &&
2470 (raidPtr->Disks[i][j].spareCol ==
2471 sparecol)) {
2472 srow = i;
2473 scol = j;
2474 break;
2475 }
2476 }
2477 }
2478
2479 /* XXX shouldn't *really* need this... */
2480 raidread_component_label(
2481 raidPtr->Disks[0][sparecol].dev,
2482 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2483 &clabel);
2484 /* make sure status is noted */
2485
2486 raid_init_component_label(raidPtr, &clabel);
2487
2488 clabel.mod_counter = raidPtr->mod_counter;
2489 clabel.row = srow;
2490 clabel.column = scol;
2491 clabel.status = rf_ds_optimal;
2492
2493 raidwrite_component_label(
2494 raidPtr->Disks[0][sparecol].dev,
2495 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2496 &clabel);
2497 if (final == RF_FINAL_COMPONENT_UPDATE) {
2498 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2499 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2500 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2501 raidPtr->mod_counter);
2502 }
2503 }
2504 }
2505 }
2506 /* printf("Component labels updated\n"); */
2507 }
2508
2509 void
2510 rf_close_component(raidPtr, vp, auto_configured)
2511 RF_Raid_t *raidPtr;
2512 struct vnode *vp;
2513 int auto_configured;
2514 {
2515 struct proc *p;
2516
2517 p = raidPtr->engine_thread;
2518
2519 if (vp != NULL) {
2520 if (auto_configured == 1) {
2521 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2522 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2523 vput(vp);
2524
2525 } else {
2526 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2527 }
2528 } else {
2529 #if 0
2530 printf("vnode was NULL\n");
2531 #endif
2532 }
2533 }
2534
2535
2536 void
2537 rf_UnconfigureVnodes(raidPtr)
2538 RF_Raid_t *raidPtr;
2539 {
2540 int r,c;
2541 struct proc *p;
2542 struct vnode *vp;
2543 int acd;
2544
2545
2546 /* We take this opportunity to close the vnodes like we should.. */
2547
2548 p = raidPtr->engine_thread;
2549
2550 for (r = 0; r < raidPtr->numRow; r++) {
2551 for (c = 0; c < raidPtr->numCol; c++) {
2552 #if 0
2553 printf("raid%d: Closing vnode for row: %d col: %d\n",
2554 raidPtr->raidid, r, c);
2555 #endif
2556 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2557 acd = raidPtr->Disks[r][c].auto_configured;
2558 rf_close_component(raidPtr, vp, acd);
2559 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2560 raidPtr->Disks[r][c].auto_configured = 0;
2561 }
2562 }
2563 for (r = 0; r < raidPtr->numSpare; r++) {
2564 #if 0
2565 printf("raid%d: Closing vnode for spare: %d\n",
2566 raidPtr->raidid, r);
2567 #endif
2568 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2569 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2570 rf_close_component(raidPtr, vp, acd);
2571 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2572 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2573 }
2574 }
2575
2576
2577 void
2578 rf_ReconThread(req)
2579 struct rf_recon_req *req;
2580 {
2581 int s;
2582 RF_Raid_t *raidPtr;
2583
2584 s = splbio();
2585 raidPtr = (RF_Raid_t *) req->raidPtr;
2586 raidPtr->recon_in_progress = 1;
2587
2588 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2589 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2590
2591 /* XXX get rid of this! we don't need it at all.. */
2592 RF_Free(req, sizeof(*req));
2593
2594 raidPtr->recon_in_progress = 0;
2595 splx(s);
2596
2597 /* That's all... */
2598 kthread_exit(0); /* does not return */
2599 }
2600
2601 void
2602 rf_RewriteParityThread(raidPtr)
2603 RF_Raid_t *raidPtr;
2604 {
2605 int retcode;
2606 int s;
2607
2608 raidPtr->parity_rewrite_in_progress = 1;
2609 s = splbio();
2610 retcode = rf_RewriteParity(raidPtr);
2611 splx(s);
2612 if (retcode) {
2613 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2614 } else {
2615 /* set the clean bit! If we shutdown correctly,
2616 the clean bit on each component label will get
2617 set */
2618 raidPtr->parity_good = RF_RAID_CLEAN;
2619 }
2620 raidPtr->parity_rewrite_in_progress = 0;
2621
2622 /* Anyone waiting for us to stop? If so, inform them... */
2623 if (raidPtr->waitShutdown) {
2624 wakeup(&raidPtr->parity_rewrite_in_progress);
2625 }
2626
2627 /* That's all... */
2628 kthread_exit(0); /* does not return */
2629 }
2630
2631
2632 void
2633 rf_CopybackThread(raidPtr)
2634 RF_Raid_t *raidPtr;
2635 {
2636 int s;
2637
2638 raidPtr->copyback_in_progress = 1;
2639 s = splbio();
2640 rf_CopybackReconstructedData(raidPtr);
2641 splx(s);
2642 raidPtr->copyback_in_progress = 0;
2643
2644 /* That's all... */
2645 kthread_exit(0); /* does not return */
2646 }
2647
2648
2649 void
2650 rf_ReconstructInPlaceThread(req)
2651 struct rf_recon_req *req;
2652 {
2653 int retcode;
2654 int s;
2655 RF_Raid_t *raidPtr;
2656
2657 s = splbio();
2658 raidPtr = req->raidPtr;
2659 raidPtr->recon_in_progress = 1;
2660 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2661 RF_Free(req, sizeof(*req));
2662 raidPtr->recon_in_progress = 0;
2663 splx(s);
2664
2665 /* That's all... */
2666 kthread_exit(0); /* does not return */
2667 }
2668
2669 void
2670 rf_mountroot_hook(dev)
2671 struct device *dev;
2672 {
2673
2674 }
2675
2676
2677 RF_AutoConfig_t *
2678 rf_find_raid_components()
2679 {
2680 struct vnode *vp;
2681 struct disklabel label;
2682 struct device *dv;
2683 dev_t dev;
2684 int bmajor;
2685 int error;
2686 int i;
2687 int good_one;
2688 RF_ComponentLabel_t *clabel;
2689 RF_AutoConfig_t *ac_list;
2690 RF_AutoConfig_t *ac;
2691
2692
2693 /* initialize the AutoConfig list */
2694 ac_list = NULL;
2695
2696 /* we begin by trolling through *all* the devices on the system */
2697
2698 for (dv = alldevs.tqh_first; dv != NULL;
2699 dv = dv->dv_list.tqe_next) {
2700
2701 /* we are only interested in disks... */
2702 if (dv->dv_class != DV_DISK)
2703 continue;
2704
2705 /* we don't care about floppies... */
2706 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2707 continue;
2708 }
2709
2710 /* we don't care about CD's... */
2711 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) {
2712 continue;
2713 }
2714
2715 /* hdfd is the Atari/Hades floppy driver */
2716 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
2717 continue;
2718 }
2719 /* fdisa is the Atari/Milan floppy driver */
2720 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
2721 continue;
2722 }
2723
2724 /* need to find the device_name_to_block_device_major stuff */
2725 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
2726
2727 /* get a vnode for the raw partition of this disk */
2728
2729 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
2730 if (bdevvp(dev, &vp))
2731 panic("RAID can't alloc vnode");
2732
2733 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2734
2735 if (error) {
2736 /* "Who cares." Continue looking
2737 for something that exists*/
2738 vput(vp);
2739 continue;
2740 }
2741
2742 /* Ok, the disk exists. Go get the disklabel. */
2743 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2744 FREAD, NOCRED, 0);
2745 if (error) {
2746 /*
2747 * XXX can't happen - open() would
2748 * have errored out (or faked up one)
2749 */
2750 printf("can't get label for dev %s%c (%d)!?!?\n",
2751 dv->dv_xname, 'a' + RAW_PART, error);
2752 }
2753
2754 /* don't need this any more. We'll allocate it again
2755 a little later if we really do... */
2756 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2757 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2758 vput(vp);
2759
2760 for (i=0; i < label.d_npartitions; i++) {
2761 /* We only support partitions marked as RAID */
2762 if (label.d_partitions[i].p_fstype != FS_RAID)
2763 continue;
2764
2765 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
2766 if (bdevvp(dev, &vp))
2767 panic("RAID can't alloc vnode");
2768
2769 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2770 if (error) {
2771 /* Whatever... */
2772 vput(vp);
2773 continue;
2774 }
2775
2776 good_one = 0;
2777
2778 clabel = (RF_ComponentLabel_t *)
2779 malloc(sizeof(RF_ComponentLabel_t),
2780 M_RAIDFRAME, M_NOWAIT);
2781 if (clabel == NULL) {
2782 /* XXX CLEANUP HERE */
2783 printf("RAID auto config: out of memory!\n");
2784 return(NULL); /* XXX probably should panic? */
2785 }
2786
2787 if (!raidread_component_label(dev, vp, clabel)) {
2788 /* Got the label. Does it look reasonable? */
2789 if (rf_reasonable_label(clabel) &&
2790 (clabel->partitionSize <=
2791 label.d_partitions[i].p_size)) {
2792 #if DEBUG
2793 printf("Component on: %s%c: %d\n",
2794 dv->dv_xname, 'a'+i,
2795 label.d_partitions[i].p_size);
2796 rf_print_component_label(clabel);
2797 #endif
2798 /* if it's reasonable, add it,
2799 else ignore it. */
2800 ac = (RF_AutoConfig_t *)
2801 malloc(sizeof(RF_AutoConfig_t),
2802 M_RAIDFRAME,
2803 M_NOWAIT);
2804 if (ac == NULL) {
2805 /* XXX should panic?? */
2806 return(NULL);
2807 }
2808
2809 sprintf(ac->devname, "%s%c",
2810 dv->dv_xname, 'a'+i);
2811 ac->dev = dev;
2812 ac->vp = vp;
2813 ac->clabel = clabel;
2814 ac->next = ac_list;
2815 ac_list = ac;
2816 good_one = 1;
2817 }
2818 }
2819 if (!good_one) {
2820 /* cleanup */
2821 free(clabel, M_RAIDFRAME);
2822 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2823 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2824 vput(vp);
2825 }
2826 }
2827 }
2828 return(ac_list);
2829 }
2830
2831 static int
2832 rf_reasonable_label(clabel)
2833 RF_ComponentLabel_t *clabel;
2834 {
2835
2836 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2837 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2838 ((clabel->clean == RF_RAID_CLEAN) ||
2839 (clabel->clean == RF_RAID_DIRTY)) &&
2840 clabel->row >=0 &&
2841 clabel->column >= 0 &&
2842 clabel->num_rows > 0 &&
2843 clabel->num_columns > 0 &&
2844 clabel->row < clabel->num_rows &&
2845 clabel->column < clabel->num_columns &&
2846 clabel->blockSize > 0 &&
2847 clabel->numBlocks > 0) {
2848 /* label looks reasonable enough... */
2849 return(1);
2850 }
2851 return(0);
2852 }
2853
2854
2855 void
2856 rf_print_component_label(clabel)
2857 RF_ComponentLabel_t *clabel;
2858 {
2859 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2860 clabel->row, clabel->column,
2861 clabel->num_rows, clabel->num_columns);
2862 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2863 clabel->version, clabel->serial_number,
2864 clabel->mod_counter);
2865 printf(" Clean: %s Status: %d\n",
2866 clabel->clean ? "Yes" : "No", clabel->status );
2867 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2868 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2869 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2870 (char) clabel->parityConfig, clabel->blockSize,
2871 clabel->numBlocks);
2872 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2873 printf(" Contains root partition: %s\n",
2874 clabel->root_partition ? "Yes" : "No" );
2875 printf(" Last configured as: raid%d\n", clabel->last_unit );
2876 #if 0
2877 printf(" Config order: %d\n", clabel->config_order);
2878 #endif
2879
2880 }
2881
2882 RF_ConfigSet_t *
2883 rf_create_auto_sets(ac_list)
2884 RF_AutoConfig_t *ac_list;
2885 {
2886 RF_AutoConfig_t *ac;
2887 RF_ConfigSet_t *config_sets;
2888 RF_ConfigSet_t *cset;
2889 RF_AutoConfig_t *ac_next;
2890
2891
2892 config_sets = NULL;
2893
2894 /* Go through the AutoConfig list, and figure out which components
2895 belong to what sets. */
2896 ac = ac_list;
2897 while(ac!=NULL) {
2898 /* we're going to putz with ac->next, so save it here
2899 for use at the end of the loop */
2900 ac_next = ac->next;
2901
2902 if (config_sets == NULL) {
2903 /* will need at least this one... */
2904 config_sets = (RF_ConfigSet_t *)
2905 malloc(sizeof(RF_ConfigSet_t),
2906 M_RAIDFRAME, M_NOWAIT);
2907 if (config_sets == NULL) {
2908 panic("rf_create_auto_sets: No memory!\n");
2909 }
2910 /* this one is easy :) */
2911 config_sets->ac = ac;
2912 config_sets->next = NULL;
2913 config_sets->rootable = 0;
2914 ac->next = NULL;
2915 } else {
2916 /* which set does this component fit into? */
2917 cset = config_sets;
2918 while(cset!=NULL) {
2919 if (rf_does_it_fit(cset, ac)) {
2920 /* looks like it matches... */
2921 ac->next = cset->ac;
2922 cset->ac = ac;
2923 break;
2924 }
2925 cset = cset->next;
2926 }
2927 if (cset==NULL) {
2928 /* didn't find a match above... new set..*/
2929 cset = (RF_ConfigSet_t *)
2930 malloc(sizeof(RF_ConfigSet_t),
2931 M_RAIDFRAME, M_NOWAIT);
2932 if (cset == NULL) {
2933 panic("rf_create_auto_sets: No memory!\n");
2934 }
2935 cset->ac = ac;
2936 ac->next = NULL;
2937 cset->next = config_sets;
2938 cset->rootable = 0;
2939 config_sets = cset;
2940 }
2941 }
2942 ac = ac_next;
2943 }
2944
2945
2946 return(config_sets);
2947 }
2948
2949 static int
2950 rf_does_it_fit(cset, ac)
2951 RF_ConfigSet_t *cset;
2952 RF_AutoConfig_t *ac;
2953 {
2954 RF_ComponentLabel_t *clabel1, *clabel2;
2955
2956 /* If this one matches the *first* one in the set, that's good
2957 enough, since the other members of the set would have been
2958 through here too... */
2959 /* note that we are not checking partitionSize here..
2960
2961 Note that we are also not checking the mod_counters here.
2962 If everything else matches execpt the mod_counter, that's
2963 good enough for this test. We will deal with the mod_counters
2964 a little later in the autoconfiguration process.
2965
2966 (clabel1->mod_counter == clabel2->mod_counter) &&
2967
2968 The reason we don't check for this is that failed disks
2969 will have lower modification counts. If those disks are
2970 not added to the set they used to belong to, then they will
2971 form their own set, which may result in 2 different sets,
2972 for example, competing to be configured at raid0, and
2973 perhaps competing to be the root filesystem set. If the
2974 wrong ones get configured, or both attempt to become /,
2975 weird behaviour and or serious lossage will occur. Thus we
2976 need to bring them into the fold here, and kick them out at
2977 a later point.
2978
2979 */
2980
2981 clabel1 = cset->ac->clabel;
2982 clabel2 = ac->clabel;
2983 if ((clabel1->version == clabel2->version) &&
2984 (clabel1->serial_number == clabel2->serial_number) &&
2985 (clabel1->num_rows == clabel2->num_rows) &&
2986 (clabel1->num_columns == clabel2->num_columns) &&
2987 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2988 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2989 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2990 (clabel1->parityConfig == clabel2->parityConfig) &&
2991 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2992 (clabel1->blockSize == clabel2->blockSize) &&
2993 (clabel1->numBlocks == clabel2->numBlocks) &&
2994 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2995 (clabel1->root_partition == clabel2->root_partition) &&
2996 (clabel1->last_unit == clabel2->last_unit) &&
2997 (clabel1->config_order == clabel2->config_order)) {
2998 /* if it get's here, it almost *has* to be a match */
2999 } else {
3000 /* it's not consistent with somebody in the set..
3001 punt */
3002 return(0);
3003 }
3004 /* all was fine.. it must fit... */
3005 return(1);
3006 }
3007
3008 int
3009 rf_have_enough_components(cset)
3010 RF_ConfigSet_t *cset;
3011 {
3012 RF_AutoConfig_t *ac;
3013 RF_AutoConfig_t *auto_config;
3014 RF_ComponentLabel_t *clabel;
3015 int r,c;
3016 int num_rows;
3017 int num_cols;
3018 int num_missing;
3019 int mod_counter;
3020 int mod_counter_found;
3021 int even_pair_failed;
3022 char parity_type;
3023
3024
3025 /* check to see that we have enough 'live' components
3026 of this set. If so, we can configure it if necessary */
3027
3028 num_rows = cset->ac->clabel->num_rows;
3029 num_cols = cset->ac->clabel->num_columns;
3030 parity_type = cset->ac->clabel->parityConfig;
3031
3032 /* XXX Check for duplicate components!?!?!? */
3033
3034 /* Determine what the mod_counter is supposed to be for this set. */
3035
3036 mod_counter_found = 0;
3037 mod_counter = 0;
3038 ac = cset->ac;
3039 while(ac!=NULL) {
3040 if (mod_counter_found==0) {
3041 mod_counter = ac->clabel->mod_counter;
3042 mod_counter_found = 1;
3043 } else {
3044 if (ac->clabel->mod_counter > mod_counter) {
3045 mod_counter = ac->clabel->mod_counter;
3046 }
3047 }
3048 ac = ac->next;
3049 }
3050
3051 num_missing = 0;
3052 auto_config = cset->ac;
3053
3054 for(r=0; r<num_rows; r++) {
3055 even_pair_failed = 0;
3056 for(c=0; c<num_cols; c++) {
3057 ac = auto_config;
3058 while(ac!=NULL) {
3059 if ((ac->clabel->row == r) &&
3060 (ac->clabel->column == c) &&
3061 (ac->clabel->mod_counter == mod_counter)) {
3062 /* it's this one... */
3063 #if DEBUG
3064 printf("Found: %s at %d,%d\n",
3065 ac->devname,r,c);
3066 #endif
3067 break;
3068 }
3069 ac=ac->next;
3070 }
3071 if (ac==NULL) {
3072 /* Didn't find one here! */
3073 /* special case for RAID 1, especially
3074 where there are more than 2
3075 components (where RAIDframe treats
3076 things a little differently :( ) */
3077 if (parity_type == '1') {
3078 if (c%2 == 0) { /* even component */
3079 even_pair_failed = 1;
3080 } else { /* odd component. If
3081 we're failed, and
3082 so is the even
3083 component, it's
3084 "Good Night, Charlie" */
3085 if (even_pair_failed == 1) {
3086 return(0);
3087 }
3088 }
3089 } else {
3090 /* normal accounting */
3091 num_missing++;
3092 }
3093 }
3094 if ((parity_type == '1') && (c%2 == 1)) {
3095 /* Just did an even component, and we didn't
3096 bail.. reset the even_pair_failed flag,
3097 and go on to the next component.... */
3098 even_pair_failed = 0;
3099 }
3100 }
3101 }
3102
3103 clabel = cset->ac->clabel;
3104
3105 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3106 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3107 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3108 /* XXX this needs to be made *much* more general */
3109 /* Too many failures */
3110 return(0);
3111 }
3112 /* otherwise, all is well, and we've got enough to take a kick
3113 at autoconfiguring this set */
3114 return(1);
3115 }
3116
3117 void
3118 rf_create_configuration(ac,config,raidPtr)
3119 RF_AutoConfig_t *ac;
3120 RF_Config_t *config;
3121 RF_Raid_t *raidPtr;
3122 {
3123 RF_ComponentLabel_t *clabel;
3124 int i;
3125
3126 clabel = ac->clabel;
3127
3128 /* 1. Fill in the common stuff */
3129 config->numRow = clabel->num_rows;
3130 config->numCol = clabel->num_columns;
3131 config->numSpare = 0; /* XXX should this be set here? */
3132 config->sectPerSU = clabel->sectPerSU;
3133 config->SUsPerPU = clabel->SUsPerPU;
3134 config->SUsPerRU = clabel->SUsPerRU;
3135 config->parityConfig = clabel->parityConfig;
3136 /* XXX... */
3137 strcpy(config->diskQueueType,"fifo");
3138 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3139 config->layoutSpecificSize = 0; /* XXX ?? */
3140
3141 while(ac!=NULL) {
3142 /* row/col values will be in range due to the checks
3143 in reasonable_label() */
3144 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3145 ac->devname);
3146 ac = ac->next;
3147 }
3148
3149 for(i=0;i<RF_MAXDBGV;i++) {
3150 config->debugVars[i][0] = NULL;
3151 }
3152 }
3153
3154 int
3155 rf_set_autoconfig(raidPtr, new_value)
3156 RF_Raid_t *raidPtr;
3157 int new_value;
3158 {
3159 RF_ComponentLabel_t clabel;
3160 struct vnode *vp;
3161 dev_t dev;
3162 int row, column;
3163
3164 raidPtr->autoconfigure = new_value;
3165 for(row=0; row<raidPtr->numRow; row++) {
3166 for(column=0; column<raidPtr->numCol; column++) {
3167 if (raidPtr->Disks[row][column].status ==
3168 rf_ds_optimal) {
3169 dev = raidPtr->Disks[row][column].dev;
3170 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3171 raidread_component_label(dev, vp, &clabel);
3172 clabel.autoconfigure = new_value;
3173 raidwrite_component_label(dev, vp, &clabel);
3174 }
3175 }
3176 }
3177 return(new_value);
3178 }
3179
3180 int
3181 rf_set_rootpartition(raidPtr, new_value)
3182 RF_Raid_t *raidPtr;
3183 int new_value;
3184 {
3185 RF_ComponentLabel_t clabel;
3186 struct vnode *vp;
3187 dev_t dev;
3188 int row, column;
3189
3190 raidPtr->root_partition = new_value;
3191 for(row=0; row<raidPtr->numRow; row++) {
3192 for(column=0; column<raidPtr->numCol; column++) {
3193 if (raidPtr->Disks[row][column].status ==
3194 rf_ds_optimal) {
3195 dev = raidPtr->Disks[row][column].dev;
3196 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3197 raidread_component_label(dev, vp, &clabel);
3198 clabel.root_partition = new_value;
3199 raidwrite_component_label(dev, vp, &clabel);
3200 }
3201 }
3202 }
3203 return(new_value);
3204 }
3205
3206 void
3207 rf_release_all_vps(cset)
3208 RF_ConfigSet_t *cset;
3209 {
3210 RF_AutoConfig_t *ac;
3211
3212 ac = cset->ac;
3213 while(ac!=NULL) {
3214 /* Close the vp, and give it back */
3215 if (ac->vp) {
3216 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3217 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3218 vput(ac->vp);
3219 ac->vp = NULL;
3220 }
3221 ac = ac->next;
3222 }
3223 }
3224
3225
3226 void
3227 rf_cleanup_config_set(cset)
3228 RF_ConfigSet_t *cset;
3229 {
3230 RF_AutoConfig_t *ac;
3231 RF_AutoConfig_t *next_ac;
3232
3233 ac = cset->ac;
3234 while(ac!=NULL) {
3235 next_ac = ac->next;
3236 /* nuke the label */
3237 free(ac->clabel, M_RAIDFRAME);
3238 /* cleanup the config structure */
3239 free(ac, M_RAIDFRAME);
3240 /* "next.." */
3241 ac = next_ac;
3242 }
3243 /* and, finally, nuke the config set */
3244 free(cset, M_RAIDFRAME);
3245 }
3246
3247
3248 void
3249 raid_init_component_label(raidPtr, clabel)
3250 RF_Raid_t *raidPtr;
3251 RF_ComponentLabel_t *clabel;
3252 {
3253 /* current version number */
3254 clabel->version = RF_COMPONENT_LABEL_VERSION;
3255 clabel->serial_number = raidPtr->serial_number;
3256 clabel->mod_counter = raidPtr->mod_counter;
3257 clabel->num_rows = raidPtr->numRow;
3258 clabel->num_columns = raidPtr->numCol;
3259 clabel->clean = RF_RAID_DIRTY; /* not clean */
3260 clabel->status = rf_ds_optimal; /* "It's good!" */
3261
3262 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3263 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3264 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3265
3266 clabel->blockSize = raidPtr->bytesPerSector;
3267 clabel->numBlocks = raidPtr->sectorsPerDisk;
3268
3269 /* XXX not portable */
3270 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3271 clabel->maxOutstanding = raidPtr->maxOutstanding;
3272 clabel->autoconfigure = raidPtr->autoconfigure;
3273 clabel->root_partition = raidPtr->root_partition;
3274 clabel->last_unit = raidPtr->raidid;
3275 clabel->config_order = raidPtr->config_order;
3276 }
3277
3278 int
3279 rf_auto_config_set(cset,unit)
3280 RF_ConfigSet_t *cset;
3281 int *unit;
3282 {
3283 RF_Raid_t *raidPtr;
3284 RF_Config_t *config;
3285 int raidID;
3286 int retcode;
3287
3288 #if DEBUG
3289 printf("RAID autoconfigure\n");
3290 #endif
3291
3292 retcode = 0;
3293 *unit = -1;
3294
3295 /* 1. Create a config structure */
3296
3297 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3298 M_RAIDFRAME,
3299 M_NOWAIT);
3300 if (config==NULL) {
3301 printf("Out of mem!?!?\n");
3302 /* XXX do something more intelligent here. */
3303 return(1);
3304 }
3305
3306 memset(config, 0, sizeof(RF_Config_t));
3307
3308 /* XXX raidID needs to be set correctly.. */
3309
3310 /*
3311 2. Figure out what RAID ID this one is supposed to live at
3312 See if we can get the same RAID dev that it was configured
3313 on last time..
3314 */
3315
3316 raidID = cset->ac->clabel->last_unit;
3317 if ((raidID < 0) || (raidID >= numraid)) {
3318 /* let's not wander off into lala land. */
3319 raidID = numraid - 1;
3320 }
3321 if (raidPtrs[raidID]->valid != 0) {
3322
3323 /*
3324 Nope... Go looking for an alternative...
3325 Start high so we don't immediately use raid0 if that's
3326 not taken.
3327 */
3328
3329 for(raidID = numraid - 1; raidID >= 0; raidID--) {
3330 if (raidPtrs[raidID]->valid == 0) {
3331 /* can use this one! */
3332 break;
3333 }
3334 }
3335 }
3336
3337 if (raidID < 0) {
3338 /* punt... */
3339 printf("Unable to auto configure this set!\n");
3340 printf("(Out of RAID devs!)\n");
3341 return(1);
3342 }
3343
3344 #if DEBUG
3345 printf("Configuring raid%d:\n",raidID);
3346 #endif
3347
3348 raidPtr = raidPtrs[raidID];
3349
3350 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3351 raidPtr->raidid = raidID;
3352 raidPtr->openings = RAIDOUTSTANDING;
3353
3354 /* 3. Build the configuration structure */
3355 rf_create_configuration(cset->ac, config, raidPtr);
3356
3357 /* 4. Do the configuration */
3358 retcode = rf_Configure(raidPtr, config, cset->ac);
3359
3360 if (retcode == 0) {
3361
3362 raidinit(raidPtrs[raidID]);
3363
3364 rf_markalldirty(raidPtrs[raidID]);
3365 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3366 if (cset->ac->clabel->root_partition==1) {
3367 /* everything configured just fine. Make a note
3368 that this set is eligible to be root. */
3369 cset->rootable = 1;
3370 /* XXX do this here? */
3371 raidPtrs[raidID]->root_partition = 1;
3372 }
3373 }
3374
3375 /* 5. Cleanup */
3376 free(config, M_RAIDFRAME);
3377
3378 *unit = raidID;
3379 return(retcode);
3380 }
3381
3382 void
3383 rf_disk_unbusy(desc)
3384 RF_RaidAccessDesc_t *desc;
3385 {
3386 struct buf *bp;
3387
3388 bp = (struct buf *)desc->bp;
3389 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3390 (bp->b_bcount - bp->b_resid));
3391 }
3392