rf_netbsdkintf.c revision 1.101 1 /* $NetBSD: rf_netbsdkintf.c,v 1.101 2000/12/05 01:35:56 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136 #include <sys/reboot.h>
137
138 #include "raid.h"
139 #include "opt_raid_autoconfig.h"
140 #include "rf_raid.h"
141 #include "rf_raidframe.h"
142 #include "rf_copyback.h"
143 #include "rf_dag.h"
144 #include "rf_dagflags.h"
145 #include "rf_desc.h"
146 #include "rf_diskqueue.h"
147 #include "rf_acctrace.h"
148 #include "rf_etimer.h"
149 #include "rf_general.h"
150 #include "rf_debugMem.h"
151 #include "rf_kintf.h"
152 #include "rf_options.h"
153 #include "rf_driver.h"
154 #include "rf_parityscan.h"
155 #include "rf_debugprint.h"
156 #include "rf_threadstuff.h"
157 #include "rf_configure.h"
158
159 int rf_kdebug_level = 0;
160
161 #ifdef DEBUG
162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
163 #else /* DEBUG */
164 #define db1_printf(a) { }
165 #endif /* DEBUG */
166
167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
168
169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
170
171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172 * spare table */
173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174 * installation process */
175
176 /* prototypes */
177 static void KernelWakeupFunc(struct buf * bp);
178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179 dev_t dev, RF_SectorNum_t startSect,
180 RF_SectorCount_t numSect, caddr_t buf,
181 void (*cbFunc) (struct buf *), void *cbArg,
182 int logBytesPerSector, struct proc * b_proc);
183 static void raidinit __P((RF_Raid_t *));
184
185 void raidattach __P((int));
186 int raidsize __P((dev_t));
187 int raidopen __P((dev_t, int, int, struct proc *));
188 int raidclose __P((dev_t, int, int, struct proc *));
189 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
190 int raidwrite __P((dev_t, struct uio *, int));
191 int raidread __P((dev_t, struct uio *, int));
192 void raidstrategy __P((struct buf *));
193 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
194
195 /*
196 * Pilfered from ccd.c
197 */
198
199 struct raidbuf {
200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
201 struct buf *rf_obp; /* ptr. to original I/O buf */
202 int rf_flags; /* misc. flags */
203 RF_DiskQueueData_t *req;/* the request that this was part of.. */
204 };
205
206
207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
208 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
209
210 /* XXX Not sure if the following should be replacing the raidPtrs above,
211 or if it should be used in conjunction with that...
212 */
213
214 struct raid_softc {
215 int sc_flags; /* flags */
216 int sc_cflags; /* configuration flags */
217 size_t sc_size; /* size of the raid device */
218 char sc_xname[20]; /* XXX external name */
219 struct disk sc_dkdev; /* generic disk device info */
220 struct pool sc_cbufpool; /* component buffer pool */
221 struct buf_queue buf_queue; /* used for the device queue */
222 };
223 /* sc_flags */
224 #define RAIDF_INITED 0x01 /* unit has been initialized */
225 #define RAIDF_WLABEL 0x02 /* label area is writable */
226 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
227 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
228 #define RAIDF_LOCKED 0x80 /* unit is locked */
229
230 #define raidunit(x) DISKUNIT(x)
231 int numraid = 0;
232
233 /*
234 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
235 * Be aware that large numbers can allow the driver to consume a lot of
236 * kernel memory, especially on writes, and in degraded mode reads.
237 *
238 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
239 * a single 64K write will typically require 64K for the old data,
240 * 64K for the old parity, and 64K for the new parity, for a total
241 * of 192K (if the parity buffer is not re-used immediately).
242 * Even it if is used immedately, that's still 128K, which when multiplied
243 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
244 *
245 * Now in degraded mode, for example, a 64K read on the above setup may
246 * require data reconstruction, which will require *all* of the 4 remaining
247 * disks to participate -- 4 * 32K/disk == 128K again.
248 */
249
250 #ifndef RAIDOUTSTANDING
251 #define RAIDOUTSTANDING 6
252 #endif
253
254 #define RAIDLABELDEV(dev) \
255 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
256
257 /* declared here, and made public, for the benefit of KVM stuff.. */
258 struct raid_softc *raid_softc;
259
260 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
261 struct disklabel *));
262 static void raidgetdisklabel __P((dev_t));
263 static void raidmakedisklabel __P((struct raid_softc *));
264
265 static int raidlock __P((struct raid_softc *));
266 static void raidunlock __P((struct raid_softc *));
267
268 static void rf_markalldirty __P((RF_Raid_t *));
269 void rf_mountroot_hook __P((struct device *));
270
271 struct device *raidrootdev;
272
273 void rf_ReconThread __P((struct rf_recon_req *));
274 /* XXX what I want is: */
275 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
276 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
277 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
278 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
279 void rf_buildroothack __P((void *));
280
281 RF_AutoConfig_t *rf_find_raid_components __P((void));
282 RF_ConfigSet_t *rf_create_auto_sets __P((RF_AutoConfig_t *));
283 static int rf_does_it_fit __P((RF_ConfigSet_t *,RF_AutoConfig_t *));
284 static int rf_reasonable_label __P((RF_ComponentLabel_t *));
285 void rf_create_configuration __P((RF_AutoConfig_t *,RF_Config_t *,
286 RF_Raid_t *));
287 int rf_set_autoconfig __P((RF_Raid_t *, int));
288 int rf_set_rootpartition __P((RF_Raid_t *, int));
289 void rf_release_all_vps __P((RF_ConfigSet_t *));
290 void rf_cleanup_config_set __P((RF_ConfigSet_t *));
291 int rf_have_enough_components __P((RF_ConfigSet_t *));
292 int rf_auto_config_set __P((RF_ConfigSet_t *, int *));
293
294 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
295 allow autoconfig to take place.
296 Note that this is overridden by having
297 RAID_AUTOCONFIG as an option in the
298 kernel config file. */
299
300 void
301 raidattach(num)
302 int num;
303 {
304 int raidID;
305 int i, rc;
306 RF_AutoConfig_t *ac_list; /* autoconfig list */
307 RF_ConfigSet_t *config_sets;
308
309 #ifdef DEBUG
310 printf("raidattach: Asked for %d units\n", num);
311 #endif
312
313 if (num <= 0) {
314 #ifdef DIAGNOSTIC
315 panic("raidattach: count <= 0");
316 #endif
317 return;
318 }
319 /* This is where all the initialization stuff gets done. */
320
321 numraid = num;
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336
337 for (i = 0; i < num; i++)
338 raidPtrs[i] = NULL;
339 rc = rf_BootRaidframe();
340 if (rc == 0)
341 printf("Kernelized RAIDframe activated\n");
342 else
343 panic("Serious error booting RAID!!\n");
344
345 /* put together some datastructures like the CCD device does.. This
346 * lets us lock the device and what-not when it gets opened. */
347
348 raid_softc = (struct raid_softc *)
349 malloc(num * sizeof(struct raid_softc),
350 M_RAIDFRAME, M_NOWAIT);
351 if (raid_softc == NULL) {
352 printf("WARNING: no memory for RAIDframe driver\n");
353 return;
354 }
355
356 bzero(raid_softc, num * sizeof(struct raid_softc));
357
358 raidrootdev = (struct device *)malloc(num * sizeof(struct device),
359 M_RAIDFRAME, M_NOWAIT);
360 if (raidrootdev == NULL) {
361 panic("No memory for RAIDframe driver!!?!?!\n");
362 }
363
364 for (raidID = 0; raidID < num; raidID++) {
365 BUFQ_INIT(&raid_softc[raidID].buf_queue);
366
367 raidrootdev[raidID].dv_class = DV_DISK;
368 raidrootdev[raidID].dv_cfdata = NULL;
369 raidrootdev[raidID].dv_unit = raidID;
370 raidrootdev[raidID].dv_parent = NULL;
371 raidrootdev[raidID].dv_flags = 0;
372 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
373
374 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
375 (RF_Raid_t *));
376 if (raidPtrs[raidID] == NULL) {
377 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
378 numraid = raidID;
379 return;
380 }
381 }
382
383 #if RAID_AUTOCONFIG
384 raidautoconfig = 1;
385 #endif
386
387 if (raidautoconfig) {
388 /* 1. locate all RAID components on the system */
389
390 #if DEBUG
391 printf("Searching for raid components...\n");
392 #endif
393 ac_list = rf_find_raid_components();
394
395 /* 2. sort them into their respective sets */
396
397 config_sets = rf_create_auto_sets(ac_list);
398
399 /* 3. evaluate each set and configure the valid ones
400 This gets done in rf_buildroothack() */
401
402 /* schedule the creation of the thread to do the
403 "/ on RAID" stuff */
404
405 kthread_create(rf_buildroothack,config_sets);
406
407 #if 0
408 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
409 #endif
410 }
411
412 }
413
414 void
415 rf_buildroothack(arg)
416 void *arg;
417 {
418 RF_ConfigSet_t *config_sets = arg;
419 RF_ConfigSet_t *cset;
420 RF_ConfigSet_t *next_cset;
421 int retcode;
422 int raidID;
423 int rootID;
424 int num_root;
425
426 rootID = 0;
427 num_root = 0;
428 cset = config_sets;
429 while(cset != NULL ) {
430 next_cset = cset->next;
431 if (rf_have_enough_components(cset) &&
432 cset->ac->clabel->autoconfigure==1) {
433 retcode = rf_auto_config_set(cset,&raidID);
434 if (!retcode) {
435 if (cset->rootable) {
436 rootID = raidID;
437 num_root++;
438 }
439 } else {
440 /* The autoconfig didn't work :( */
441 #if DEBUG
442 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
443 #endif
444 rf_release_all_vps(cset);
445 }
446 } else {
447 /* we're not autoconfiguring this set...
448 release the associated resources */
449 rf_release_all_vps(cset);
450 }
451 /* cleanup */
452 rf_cleanup_config_set(cset);
453 cset = next_cset;
454 }
455 if (boothowto & RB_ASKNAME) {
456 /* We don't auto-config... */
457 } else {
458 /* They didn't ask, and we found something bootable... */
459
460 if (num_root == 1) {
461 booted_device = &raidrootdev[rootID];
462 } else if (num_root > 1) {
463 /* we can't guess.. require the user to answer... */
464 boothowto |= RB_ASKNAME;
465 }
466 }
467 }
468
469
470 int
471 raidsize(dev)
472 dev_t dev;
473 {
474 struct raid_softc *rs;
475 struct disklabel *lp;
476 int part, unit, omask, size;
477
478 unit = raidunit(dev);
479 if (unit >= numraid)
480 return (-1);
481 rs = &raid_softc[unit];
482
483 if ((rs->sc_flags & RAIDF_INITED) == 0)
484 return (-1);
485
486 part = DISKPART(dev);
487 omask = rs->sc_dkdev.dk_openmask & (1 << part);
488 lp = rs->sc_dkdev.dk_label;
489
490 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
491 return (-1);
492
493 if (lp->d_partitions[part].p_fstype != FS_SWAP)
494 size = -1;
495 else
496 size = lp->d_partitions[part].p_size *
497 (lp->d_secsize / DEV_BSIZE);
498
499 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
500 return (-1);
501
502 return (size);
503
504 }
505
506 int
507 raiddump(dev, blkno, va, size)
508 dev_t dev;
509 daddr_t blkno;
510 caddr_t va;
511 size_t size;
512 {
513 /* Not implemented. */
514 return ENXIO;
515 }
516 /* ARGSUSED */
517 int
518 raidopen(dev, flags, fmt, p)
519 dev_t dev;
520 int flags, fmt;
521 struct proc *p;
522 {
523 int unit = raidunit(dev);
524 struct raid_softc *rs;
525 struct disklabel *lp;
526 int part, pmask;
527 int error = 0;
528
529 if (unit >= numraid)
530 return (ENXIO);
531 rs = &raid_softc[unit];
532
533 if ((error = raidlock(rs)) != 0)
534 return (error);
535 lp = rs->sc_dkdev.dk_label;
536
537 part = DISKPART(dev);
538 pmask = (1 << part);
539
540 db1_printf(("Opening raid device number: %d partition: %d\n",
541 unit, part));
542
543
544 if ((rs->sc_flags & RAIDF_INITED) &&
545 (rs->sc_dkdev.dk_openmask == 0))
546 raidgetdisklabel(dev);
547
548 /* make sure that this partition exists */
549
550 if (part != RAW_PART) {
551 db1_printf(("Not a raw partition..\n"));
552 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
553 ((part >= lp->d_npartitions) ||
554 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
555 error = ENXIO;
556 raidunlock(rs);
557 db1_printf(("Bailing out...\n"));
558 return (error);
559 }
560 }
561 /* Prevent this unit from being unconfigured while open. */
562 switch (fmt) {
563 case S_IFCHR:
564 rs->sc_dkdev.dk_copenmask |= pmask;
565 break;
566
567 case S_IFBLK:
568 rs->sc_dkdev.dk_bopenmask |= pmask;
569 break;
570 }
571
572 if ((rs->sc_dkdev.dk_openmask == 0) &&
573 ((rs->sc_flags & RAIDF_INITED) != 0)) {
574 /* First one... mark things as dirty... Note that we *MUST*
575 have done a configure before this. I DO NOT WANT TO BE
576 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
577 THAT THEY BELONG TOGETHER!!!!! */
578 /* XXX should check to see if we're only open for reading
579 here... If so, we needn't do this, but then need some
580 other way of keeping track of what's happened.. */
581
582 rf_markalldirty( raidPtrs[unit] );
583 }
584
585
586 rs->sc_dkdev.dk_openmask =
587 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
588
589 raidunlock(rs);
590
591 return (error);
592
593
594 }
595 /* ARGSUSED */
596 int
597 raidclose(dev, flags, fmt, p)
598 dev_t dev;
599 int flags, fmt;
600 struct proc *p;
601 {
602 int unit = raidunit(dev);
603 struct raid_softc *rs;
604 int error = 0;
605 int part;
606
607 if (unit >= numraid)
608 return (ENXIO);
609 rs = &raid_softc[unit];
610
611 if ((error = raidlock(rs)) != 0)
612 return (error);
613
614 part = DISKPART(dev);
615
616 /* ...that much closer to allowing unconfiguration... */
617 switch (fmt) {
618 case S_IFCHR:
619 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
620 break;
621
622 case S_IFBLK:
623 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
624 break;
625 }
626 rs->sc_dkdev.dk_openmask =
627 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
628
629 if ((rs->sc_dkdev.dk_openmask == 0) &&
630 ((rs->sc_flags & RAIDF_INITED) != 0)) {
631 /* Last one... device is not unconfigured yet.
632 Device shutdown has taken care of setting the
633 clean bits if RAIDF_INITED is not set
634 mark things as clean... */
635 #if 0
636 printf("Last one on raid%d. Updating status.\n",unit);
637 #endif
638 rf_update_component_labels(raidPtrs[unit],
639 RF_FINAL_COMPONENT_UPDATE);
640 }
641
642 raidunlock(rs);
643 return (0);
644
645 }
646
647 void
648 raidstrategy(bp)
649 struct buf *bp;
650 {
651 int s;
652
653 unsigned int raidID = raidunit(bp->b_dev);
654 RF_Raid_t *raidPtr;
655 struct raid_softc *rs = &raid_softc[raidID];
656 struct disklabel *lp;
657 int wlabel;
658
659 if ((rs->sc_flags & RAIDF_INITED) ==0) {
660 bp->b_error = ENXIO;
661 bp->b_flags |= B_ERROR;
662 bp->b_resid = bp->b_bcount;
663 biodone(bp);
664 return;
665 }
666 if (raidID >= numraid || !raidPtrs[raidID]) {
667 bp->b_error = ENODEV;
668 bp->b_flags |= B_ERROR;
669 bp->b_resid = bp->b_bcount;
670 biodone(bp);
671 return;
672 }
673 raidPtr = raidPtrs[raidID];
674 if (!raidPtr->valid) {
675 bp->b_error = ENODEV;
676 bp->b_flags |= B_ERROR;
677 bp->b_resid = bp->b_bcount;
678 biodone(bp);
679 return;
680 }
681 if (bp->b_bcount == 0) {
682 db1_printf(("b_bcount is zero..\n"));
683 biodone(bp);
684 return;
685 }
686 lp = rs->sc_dkdev.dk_label;
687
688 /*
689 * Do bounds checking and adjust transfer. If there's an
690 * error, the bounds check will flag that for us.
691 */
692
693 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
694 if (DISKPART(bp->b_dev) != RAW_PART)
695 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
696 db1_printf(("Bounds check failed!!:%d %d\n",
697 (int) bp->b_blkno, (int) wlabel));
698 biodone(bp);
699 return;
700 }
701 s = splbio();
702
703 bp->b_resid = 0;
704
705 /* stuff it onto our queue */
706 BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
707
708 raidstart(raidPtrs[raidID]);
709
710 splx(s);
711 }
712 /* ARGSUSED */
713 int
714 raidread(dev, uio, flags)
715 dev_t dev;
716 struct uio *uio;
717 int flags;
718 {
719 int unit = raidunit(dev);
720 struct raid_softc *rs;
721 int part;
722
723 if (unit >= numraid)
724 return (ENXIO);
725 rs = &raid_softc[unit];
726
727 if ((rs->sc_flags & RAIDF_INITED) == 0)
728 return (ENXIO);
729 part = DISKPART(dev);
730
731 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
732
733 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
734
735 }
736 /* ARGSUSED */
737 int
738 raidwrite(dev, uio, flags)
739 dev_t dev;
740 struct uio *uio;
741 int flags;
742 {
743 int unit = raidunit(dev);
744 struct raid_softc *rs;
745
746 if (unit >= numraid)
747 return (ENXIO);
748 rs = &raid_softc[unit];
749
750 if ((rs->sc_flags & RAIDF_INITED) == 0)
751 return (ENXIO);
752 db1_printf(("raidwrite\n"));
753 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
754
755 }
756
757 int
758 raidioctl(dev, cmd, data, flag, p)
759 dev_t dev;
760 u_long cmd;
761 caddr_t data;
762 int flag;
763 struct proc *p;
764 {
765 int unit = raidunit(dev);
766 int error = 0;
767 int part, pmask;
768 struct raid_softc *rs;
769 RF_Config_t *k_cfg, *u_cfg;
770 RF_Raid_t *raidPtr;
771 RF_RaidDisk_t *diskPtr;
772 RF_AccTotals_t *totals;
773 RF_DeviceConfig_t *d_cfg, **ucfgp;
774 u_char *specific_buf;
775 int retcode = 0;
776 int row;
777 int column;
778 struct rf_recon_req *rrcopy, *rr;
779 RF_ComponentLabel_t *clabel;
780 RF_ComponentLabel_t ci_label;
781 RF_ComponentLabel_t **clabel_ptr;
782 RF_SingleComponent_t *sparePtr,*componentPtr;
783 RF_SingleComponent_t hot_spare;
784 RF_SingleComponent_t component;
785 RF_ProgressInfo_t progressInfo, **progressInfoPtr;
786 int i, j, d;
787
788 if (unit >= numraid)
789 return (ENXIO);
790 rs = &raid_softc[unit];
791 raidPtr = raidPtrs[unit];
792
793 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
794 (int) DISKPART(dev), (int) unit, (int) cmd));
795
796 /* Must be open for writes for these commands... */
797 switch (cmd) {
798 case DIOCSDINFO:
799 case DIOCWDINFO:
800 case DIOCWLABEL:
801 if ((flag & FWRITE) == 0)
802 return (EBADF);
803 }
804
805 /* Must be initialized for these... */
806 switch (cmd) {
807 case DIOCGDINFO:
808 case DIOCSDINFO:
809 case DIOCWDINFO:
810 case DIOCGPART:
811 case DIOCWLABEL:
812 case DIOCGDEFLABEL:
813 case RAIDFRAME_SHUTDOWN:
814 case RAIDFRAME_REWRITEPARITY:
815 case RAIDFRAME_GET_INFO:
816 case RAIDFRAME_RESET_ACCTOTALS:
817 case RAIDFRAME_GET_ACCTOTALS:
818 case RAIDFRAME_KEEP_ACCTOTALS:
819 case RAIDFRAME_GET_SIZE:
820 case RAIDFRAME_FAIL_DISK:
821 case RAIDFRAME_COPYBACK:
822 case RAIDFRAME_CHECK_RECON_STATUS:
823 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
824 case RAIDFRAME_GET_COMPONENT_LABEL:
825 case RAIDFRAME_SET_COMPONENT_LABEL:
826 case RAIDFRAME_ADD_HOT_SPARE:
827 case RAIDFRAME_REMOVE_HOT_SPARE:
828 case RAIDFRAME_INIT_LABELS:
829 case RAIDFRAME_REBUILD_IN_PLACE:
830 case RAIDFRAME_CHECK_PARITY:
831 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
832 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
833 case RAIDFRAME_CHECK_COPYBACK_STATUS:
834 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
835 case RAIDFRAME_SET_AUTOCONFIG:
836 case RAIDFRAME_SET_ROOT:
837 case RAIDFRAME_DELETE_COMPONENT:
838 case RAIDFRAME_INCORPORATE_HOT_SPARE:
839 if ((rs->sc_flags & RAIDF_INITED) == 0)
840 return (ENXIO);
841 }
842
843 switch (cmd) {
844
845 /* configure the system */
846 case RAIDFRAME_CONFIGURE:
847
848 if (raidPtr->valid) {
849 /* There is a valid RAID set running on this unit! */
850 printf("raid%d: Device already configured!\n",unit);
851 return(EINVAL);
852 }
853
854 /* copy-in the configuration information */
855 /* data points to a pointer to the configuration structure */
856
857 u_cfg = *((RF_Config_t **) data);
858 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
859 if (k_cfg == NULL) {
860 return (ENOMEM);
861 }
862 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
863 sizeof(RF_Config_t));
864 if (retcode) {
865 RF_Free(k_cfg, sizeof(RF_Config_t));
866 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
867 retcode));
868 return (retcode);
869 }
870 /* allocate a buffer for the layout-specific data, and copy it
871 * in */
872 if (k_cfg->layoutSpecificSize) {
873 if (k_cfg->layoutSpecificSize > 10000) {
874 /* sanity check */
875 RF_Free(k_cfg, sizeof(RF_Config_t));
876 return (EINVAL);
877 }
878 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
879 (u_char *));
880 if (specific_buf == NULL) {
881 RF_Free(k_cfg, sizeof(RF_Config_t));
882 return (ENOMEM);
883 }
884 retcode = copyin(k_cfg->layoutSpecific,
885 (caddr_t) specific_buf,
886 k_cfg->layoutSpecificSize);
887 if (retcode) {
888 RF_Free(k_cfg, sizeof(RF_Config_t));
889 RF_Free(specific_buf,
890 k_cfg->layoutSpecificSize);
891 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
892 retcode));
893 return (retcode);
894 }
895 } else
896 specific_buf = NULL;
897 k_cfg->layoutSpecific = specific_buf;
898
899 /* should do some kind of sanity check on the configuration.
900 * Store the sum of all the bytes in the last byte? */
901
902 /* configure the system */
903
904 /*
905 * Clear the entire RAID descriptor, just to make sure
906 * there is no stale data left in the case of a
907 * reconfiguration
908 */
909 bzero((char *) raidPtr, sizeof(RF_Raid_t));
910 raidPtr->raidid = unit;
911
912 retcode = rf_Configure(raidPtr, k_cfg, NULL);
913
914 if (retcode == 0) {
915
916 /* allow this many simultaneous IO's to
917 this RAID device */
918 raidPtr->openings = RAIDOUTSTANDING;
919
920 raidinit(raidPtr);
921 rf_markalldirty(raidPtr);
922 }
923 /* free the buffers. No return code here. */
924 if (k_cfg->layoutSpecificSize) {
925 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
926 }
927 RF_Free(k_cfg, sizeof(RF_Config_t));
928
929 return (retcode);
930
931 /* shutdown the system */
932 case RAIDFRAME_SHUTDOWN:
933
934 if ((error = raidlock(rs)) != 0)
935 return (error);
936
937 /*
938 * If somebody has a partition mounted, we shouldn't
939 * shutdown.
940 */
941
942 part = DISKPART(dev);
943 pmask = (1 << part);
944 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
945 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
946 (rs->sc_dkdev.dk_copenmask & pmask))) {
947 raidunlock(rs);
948 return (EBUSY);
949 }
950
951 retcode = rf_Shutdown(raidPtr);
952
953 pool_destroy(&rs->sc_cbufpool);
954
955 /* It's no longer initialized... */
956 rs->sc_flags &= ~RAIDF_INITED;
957
958 /* Detach the disk. */
959 disk_detach(&rs->sc_dkdev);
960
961 raidunlock(rs);
962
963 return (retcode);
964 case RAIDFRAME_GET_COMPONENT_LABEL:
965 clabel_ptr = (RF_ComponentLabel_t **) data;
966 /* need to read the component label for the disk indicated
967 by row,column in clabel */
968
969 /* For practice, let's get it directly fromdisk, rather
970 than from the in-core copy */
971 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
972 (RF_ComponentLabel_t *));
973 if (clabel == NULL)
974 return (ENOMEM);
975
976 bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
977
978 retcode = copyin( *clabel_ptr, clabel,
979 sizeof(RF_ComponentLabel_t));
980
981 if (retcode) {
982 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
983 return(retcode);
984 }
985
986 row = clabel->row;
987 column = clabel->column;
988
989 if ((row < 0) || (row >= raidPtr->numRow) ||
990 (column < 0) || (column >= raidPtr->numCol +
991 raidPtr->numSpare)) {
992 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
993 return(EINVAL);
994 }
995
996 raidread_component_label(raidPtr->Disks[row][column].dev,
997 raidPtr->raid_cinfo[row][column].ci_vp,
998 clabel );
999
1000 retcode = copyout((caddr_t) clabel,
1001 (caddr_t) *clabel_ptr,
1002 sizeof(RF_ComponentLabel_t));
1003 RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1004 return (retcode);
1005
1006 case RAIDFRAME_SET_COMPONENT_LABEL:
1007 clabel = (RF_ComponentLabel_t *) data;
1008
1009 /* XXX check the label for valid stuff... */
1010 /* Note that some things *should not* get modified --
1011 the user should be re-initing the labels instead of
1012 trying to patch things.
1013 */
1014
1015 printf("Got component label:\n");
1016 printf("Version: %d\n",clabel->version);
1017 printf("Serial Number: %d\n",clabel->serial_number);
1018 printf("Mod counter: %d\n",clabel->mod_counter);
1019 printf("Row: %d\n", clabel->row);
1020 printf("Column: %d\n", clabel->column);
1021 printf("Num Rows: %d\n", clabel->num_rows);
1022 printf("Num Columns: %d\n", clabel->num_columns);
1023 printf("Clean: %d\n", clabel->clean);
1024 printf("Status: %d\n", clabel->status);
1025
1026 row = clabel->row;
1027 column = clabel->column;
1028
1029 if ((row < 0) || (row >= raidPtr->numRow) ||
1030 (column < 0) || (column >= raidPtr->numCol)) {
1031 return(EINVAL);
1032 }
1033
1034 /* XXX this isn't allowed to do anything for now :-) */
1035
1036 /* XXX and before it is, we need to fill in the rest
1037 of the fields!?!?!?! */
1038 #if 0
1039 raidwrite_component_label(
1040 raidPtr->Disks[row][column].dev,
1041 raidPtr->raid_cinfo[row][column].ci_vp,
1042 clabel );
1043 #endif
1044 return (0);
1045
1046 case RAIDFRAME_INIT_LABELS:
1047 clabel = (RF_ComponentLabel_t *) data;
1048 /*
1049 we only want the serial number from
1050 the above. We get all the rest of the information
1051 from the config that was used to create this RAID
1052 set.
1053 */
1054
1055 raidPtr->serial_number = clabel->serial_number;
1056
1057 raid_init_component_label(raidPtr, &ci_label);
1058 ci_label.serial_number = clabel->serial_number;
1059
1060 for(row=0;row<raidPtr->numRow;row++) {
1061 ci_label.row = row;
1062 for(column=0;column<raidPtr->numCol;column++) {
1063 diskPtr = &raidPtr->Disks[row][column];
1064 if (!RF_DEAD_DISK(diskPtr->status)) {
1065 ci_label.partitionSize = diskPtr->partitionSize;
1066 ci_label.column = column;
1067 raidwrite_component_label(
1068 raidPtr->Disks[row][column].dev,
1069 raidPtr->raid_cinfo[row][column].ci_vp,
1070 &ci_label );
1071 }
1072 }
1073 }
1074
1075 return (retcode);
1076 case RAIDFRAME_SET_AUTOCONFIG:
1077 d = rf_set_autoconfig(raidPtr, *(int *) data);
1078 printf("New autoconfig value is: %d\n", d);
1079 *(int *) data = d;
1080 return (retcode);
1081
1082 case RAIDFRAME_SET_ROOT:
1083 d = rf_set_rootpartition(raidPtr, *(int *) data);
1084 printf("New rootpartition value is: %d\n", d);
1085 *(int *) data = d;
1086 return (retcode);
1087
1088 /* initialize all parity */
1089 case RAIDFRAME_REWRITEPARITY:
1090
1091 if (raidPtr->Layout.map->faultsTolerated == 0) {
1092 /* Parity for RAID 0 is trivially correct */
1093 raidPtr->parity_good = RF_RAID_CLEAN;
1094 return(0);
1095 }
1096
1097 if (raidPtr->parity_rewrite_in_progress == 1) {
1098 /* Re-write is already in progress! */
1099 return(EINVAL);
1100 }
1101
1102 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1103 rf_RewriteParityThread,
1104 raidPtr,"raid_parity");
1105 return (retcode);
1106
1107
1108 case RAIDFRAME_ADD_HOT_SPARE:
1109 sparePtr = (RF_SingleComponent_t *) data;
1110 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1111 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1112 return(retcode);
1113
1114 case RAIDFRAME_REMOVE_HOT_SPARE:
1115 return(retcode);
1116
1117 case RAIDFRAME_DELETE_COMPONENT:
1118 componentPtr = (RF_SingleComponent_t *)data;
1119 memcpy( &component, componentPtr,
1120 sizeof(RF_SingleComponent_t));
1121 retcode = rf_delete_component(raidPtr, &component);
1122 return(retcode);
1123
1124 case RAIDFRAME_INCORPORATE_HOT_SPARE:
1125 componentPtr = (RF_SingleComponent_t *)data;
1126 memcpy( &component, componentPtr,
1127 sizeof(RF_SingleComponent_t));
1128 retcode = rf_incorporate_hot_spare(raidPtr, &component);
1129 return(retcode);
1130
1131 case RAIDFRAME_REBUILD_IN_PLACE:
1132
1133 if (raidPtr->Layout.map->faultsTolerated == 0) {
1134 /* Can't do this on a RAID 0!! */
1135 return(EINVAL);
1136 }
1137
1138 if (raidPtr->recon_in_progress == 1) {
1139 /* a reconstruct is already in progress! */
1140 return(EINVAL);
1141 }
1142
1143 componentPtr = (RF_SingleComponent_t *) data;
1144 memcpy( &component, componentPtr,
1145 sizeof(RF_SingleComponent_t));
1146 row = component.row;
1147 column = component.column;
1148 printf("Rebuild: %d %d\n",row, column);
1149 if ((row < 0) || (row >= raidPtr->numRow) ||
1150 (column < 0) || (column >= raidPtr->numCol)) {
1151 return(EINVAL);
1152 }
1153
1154 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1155 if (rrcopy == NULL)
1156 return(ENOMEM);
1157
1158 rrcopy->raidPtr = (void *) raidPtr;
1159 rrcopy->row = row;
1160 rrcopy->col = column;
1161
1162 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1163 rf_ReconstructInPlaceThread,
1164 rrcopy,"raid_reconip");
1165 return(retcode);
1166
1167 case RAIDFRAME_GET_INFO:
1168 if (!raidPtr->valid)
1169 return (ENODEV);
1170 ucfgp = (RF_DeviceConfig_t **) data;
1171 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1172 (RF_DeviceConfig_t *));
1173 if (d_cfg == NULL)
1174 return (ENOMEM);
1175 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1176 d_cfg->rows = raidPtr->numRow;
1177 d_cfg->cols = raidPtr->numCol;
1178 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1179 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1180 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1181 return (ENOMEM);
1182 }
1183 d_cfg->nspares = raidPtr->numSpare;
1184 if (d_cfg->nspares >= RF_MAX_DISKS) {
1185 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1186 return (ENOMEM);
1187 }
1188 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1189 d = 0;
1190 for (i = 0; i < d_cfg->rows; i++) {
1191 for (j = 0; j < d_cfg->cols; j++) {
1192 d_cfg->devs[d] = raidPtr->Disks[i][j];
1193 d++;
1194 }
1195 }
1196 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1197 d_cfg->spares[i] = raidPtr->Disks[0][j];
1198 }
1199 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1200 sizeof(RF_DeviceConfig_t));
1201 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1202
1203 return (retcode);
1204
1205 case RAIDFRAME_CHECK_PARITY:
1206 *(int *) data = raidPtr->parity_good;
1207 return (0);
1208
1209 case RAIDFRAME_RESET_ACCTOTALS:
1210 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1211 return (0);
1212
1213 case RAIDFRAME_GET_ACCTOTALS:
1214 totals = (RF_AccTotals_t *) data;
1215 *totals = raidPtr->acc_totals;
1216 return (0);
1217
1218 case RAIDFRAME_KEEP_ACCTOTALS:
1219 raidPtr->keep_acc_totals = *(int *)data;
1220 return (0);
1221
1222 case RAIDFRAME_GET_SIZE:
1223 *(int *) data = raidPtr->totalSectors;
1224 return (0);
1225
1226 /* fail a disk & optionally start reconstruction */
1227 case RAIDFRAME_FAIL_DISK:
1228
1229 if (raidPtr->Layout.map->faultsTolerated == 0) {
1230 /* Can't do this on a RAID 0!! */
1231 return(EINVAL);
1232 }
1233
1234 rr = (struct rf_recon_req *) data;
1235
1236 if (rr->row < 0 || rr->row >= raidPtr->numRow
1237 || rr->col < 0 || rr->col >= raidPtr->numCol)
1238 return (EINVAL);
1239
1240 printf("raid%d: Failing the disk: row: %d col: %d\n",
1241 unit, rr->row, rr->col);
1242
1243 /* make a copy of the recon request so that we don't rely on
1244 * the user's buffer */
1245 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1246 if (rrcopy == NULL)
1247 return(ENOMEM);
1248 bcopy(rr, rrcopy, sizeof(*rr));
1249 rrcopy->raidPtr = (void *) raidPtr;
1250
1251 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1252 rf_ReconThread,
1253 rrcopy,"raid_recon");
1254 return (0);
1255
1256 /* invoke a copyback operation after recon on whatever disk
1257 * needs it, if any */
1258 case RAIDFRAME_COPYBACK:
1259
1260 if (raidPtr->Layout.map->faultsTolerated == 0) {
1261 /* This makes no sense on a RAID 0!! */
1262 return(EINVAL);
1263 }
1264
1265 if (raidPtr->copyback_in_progress == 1) {
1266 /* Copyback is already in progress! */
1267 return(EINVAL);
1268 }
1269
1270 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1271 rf_CopybackThread,
1272 raidPtr,"raid_copyback");
1273 return (retcode);
1274
1275 /* return the percentage completion of reconstruction */
1276 case RAIDFRAME_CHECK_RECON_STATUS:
1277 if (raidPtr->Layout.map->faultsTolerated == 0) {
1278 /* This makes no sense on a RAID 0, so tell the
1279 user it's done. */
1280 *(int *) data = 100;
1281 return(0);
1282 }
1283 row = 0; /* XXX we only consider a single row... */
1284 if (raidPtr->status[row] != rf_rs_reconstructing)
1285 *(int *) data = 100;
1286 else
1287 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1288 return (0);
1289 case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1290 progressInfoPtr = (RF_ProgressInfo_t **) data;
1291 row = 0; /* XXX we only consider a single row... */
1292 if (raidPtr->status[row] != rf_rs_reconstructing) {
1293 progressInfo.remaining = 0;
1294 progressInfo.completed = 100;
1295 progressInfo.total = 100;
1296 } else {
1297 progressInfo.total =
1298 raidPtr->reconControl[row]->numRUsTotal;
1299 progressInfo.completed =
1300 raidPtr->reconControl[row]->numRUsComplete;
1301 progressInfo.remaining = progressInfo.total -
1302 progressInfo.completed;
1303 }
1304 retcode = copyout((caddr_t) &progressInfo,
1305 (caddr_t) *progressInfoPtr,
1306 sizeof(RF_ProgressInfo_t));
1307 return (retcode);
1308
1309 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1310 if (raidPtr->Layout.map->faultsTolerated == 0) {
1311 /* This makes no sense on a RAID 0, so tell the
1312 user it's done. */
1313 *(int *) data = 100;
1314 return(0);
1315 }
1316 if (raidPtr->parity_rewrite_in_progress == 1) {
1317 *(int *) data = 100 *
1318 raidPtr->parity_rewrite_stripes_done /
1319 raidPtr->Layout.numStripe;
1320 } else {
1321 *(int *) data = 100;
1322 }
1323 return (0);
1324
1325 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1326 progressInfoPtr = (RF_ProgressInfo_t **) data;
1327 if (raidPtr->parity_rewrite_in_progress == 1) {
1328 progressInfo.total = raidPtr->Layout.numStripe;
1329 progressInfo.completed =
1330 raidPtr->parity_rewrite_stripes_done;
1331 progressInfo.remaining = progressInfo.total -
1332 progressInfo.completed;
1333 } else {
1334 progressInfo.remaining = 0;
1335 progressInfo.completed = 100;
1336 progressInfo.total = 100;
1337 }
1338 retcode = copyout((caddr_t) &progressInfo,
1339 (caddr_t) *progressInfoPtr,
1340 sizeof(RF_ProgressInfo_t));
1341 return (retcode);
1342
1343 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1344 if (raidPtr->Layout.map->faultsTolerated == 0) {
1345 /* This makes no sense on a RAID 0 */
1346 *(int *) data = 100;
1347 return(0);
1348 }
1349 if (raidPtr->copyback_in_progress == 1) {
1350 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1351 raidPtr->Layout.numStripe;
1352 } else {
1353 *(int *) data = 100;
1354 }
1355 return (0);
1356
1357 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1358 progressInfoPtr = (RF_ProgressInfo_t **) data;
1359 if (raidPtr->copyback_in_progress == 1) {
1360 progressInfo.total = raidPtr->Layout.numStripe;
1361 progressInfo.completed =
1362 raidPtr->copyback_stripes_done;
1363 progressInfo.remaining = progressInfo.total -
1364 progressInfo.completed;
1365 } else {
1366 progressInfo.remaining = 0;
1367 progressInfo.completed = 100;
1368 progressInfo.total = 100;
1369 }
1370 retcode = copyout((caddr_t) &progressInfo,
1371 (caddr_t) *progressInfoPtr,
1372 sizeof(RF_ProgressInfo_t));
1373 return (retcode);
1374
1375 /* the sparetable daemon calls this to wait for the kernel to
1376 * need a spare table. this ioctl does not return until a
1377 * spare table is needed. XXX -- calling mpsleep here in the
1378 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1379 * -- I should either compute the spare table in the kernel,
1380 * or have a different -- XXX XXX -- interface (a different
1381 * character device) for delivering the table -- XXX */
1382 #if 0
1383 case RAIDFRAME_SPARET_WAIT:
1384 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1385 while (!rf_sparet_wait_queue)
1386 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1387 waitreq = rf_sparet_wait_queue;
1388 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1389 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1390
1391 /* structure assignment */
1392 *((RF_SparetWait_t *) data) = *waitreq;
1393
1394 RF_Free(waitreq, sizeof(*waitreq));
1395 return (0);
1396
1397 /* wakes up a process waiting on SPARET_WAIT and puts an error
1398 * code in it that will cause the dameon to exit */
1399 case RAIDFRAME_ABORT_SPARET_WAIT:
1400 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1401 waitreq->fcol = -1;
1402 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1403 waitreq->next = rf_sparet_wait_queue;
1404 rf_sparet_wait_queue = waitreq;
1405 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1406 wakeup(&rf_sparet_wait_queue);
1407 return (0);
1408
1409 /* used by the spare table daemon to deliver a spare table
1410 * into the kernel */
1411 case RAIDFRAME_SEND_SPARET:
1412
1413 /* install the spare table */
1414 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1415
1416 /* respond to the requestor. the return status of the spare
1417 * table installation is passed in the "fcol" field */
1418 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1419 waitreq->fcol = retcode;
1420 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1421 waitreq->next = rf_sparet_resp_queue;
1422 rf_sparet_resp_queue = waitreq;
1423 wakeup(&rf_sparet_resp_queue);
1424 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1425
1426 return (retcode);
1427 #endif
1428
1429 default:
1430 break; /* fall through to the os-specific code below */
1431
1432 }
1433
1434 if (!raidPtr->valid)
1435 return (EINVAL);
1436
1437 /*
1438 * Add support for "regular" device ioctls here.
1439 */
1440
1441 switch (cmd) {
1442 case DIOCGDINFO:
1443 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1444 break;
1445
1446 case DIOCGPART:
1447 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1448 ((struct partinfo *) data)->part =
1449 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1450 break;
1451
1452 case DIOCWDINFO:
1453 case DIOCSDINFO:
1454 if ((error = raidlock(rs)) != 0)
1455 return (error);
1456
1457 rs->sc_flags |= RAIDF_LABELLING;
1458
1459 error = setdisklabel(rs->sc_dkdev.dk_label,
1460 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1461 if (error == 0) {
1462 if (cmd == DIOCWDINFO)
1463 error = writedisklabel(RAIDLABELDEV(dev),
1464 raidstrategy, rs->sc_dkdev.dk_label,
1465 rs->sc_dkdev.dk_cpulabel);
1466 }
1467 rs->sc_flags &= ~RAIDF_LABELLING;
1468
1469 raidunlock(rs);
1470
1471 if (error)
1472 return (error);
1473 break;
1474
1475 case DIOCWLABEL:
1476 if (*(int *) data != 0)
1477 rs->sc_flags |= RAIDF_WLABEL;
1478 else
1479 rs->sc_flags &= ~RAIDF_WLABEL;
1480 break;
1481
1482 case DIOCGDEFLABEL:
1483 raidgetdefaultlabel(raidPtr, rs,
1484 (struct disklabel *) data);
1485 break;
1486
1487 default:
1488 retcode = ENOTTY;
1489 }
1490 return (retcode);
1491
1492 }
1493
1494
1495 /* raidinit -- complete the rest of the initialization for the
1496 RAIDframe device. */
1497
1498
1499 static void
1500 raidinit(raidPtr)
1501 RF_Raid_t *raidPtr;
1502 {
1503 struct raid_softc *rs;
1504 int unit;
1505
1506 unit = raidPtr->raidid;
1507
1508 rs = &raid_softc[unit];
1509 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1510 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1511
1512
1513 /* XXX should check return code first... */
1514 rs->sc_flags |= RAIDF_INITED;
1515
1516 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1517
1518 rs->sc_dkdev.dk_name = rs->sc_xname;
1519
1520 /* disk_attach actually creates space for the CPU disklabel, among
1521 * other things, so it's critical to call this *BEFORE* we try putzing
1522 * with disklabels. */
1523
1524 disk_attach(&rs->sc_dkdev);
1525
1526 /* XXX There may be a weird interaction here between this, and
1527 * protectedSectors, as used in RAIDframe. */
1528
1529 rs->sc_size = raidPtr->totalSectors;
1530
1531 }
1532
1533 /* wake up the daemon & tell it to get us a spare table
1534 * XXX
1535 * the entries in the queues should be tagged with the raidPtr
1536 * so that in the extremely rare case that two recons happen at once,
1537 * we know for which device were requesting a spare table
1538 * XXX
1539 *
1540 * XXX This code is not currently used. GO
1541 */
1542 int
1543 rf_GetSpareTableFromDaemon(req)
1544 RF_SparetWait_t *req;
1545 {
1546 int retcode;
1547
1548 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1549 req->next = rf_sparet_wait_queue;
1550 rf_sparet_wait_queue = req;
1551 wakeup(&rf_sparet_wait_queue);
1552
1553 /* mpsleep unlocks the mutex */
1554 while (!rf_sparet_resp_queue) {
1555 tsleep(&rf_sparet_resp_queue, PRIBIO,
1556 "raidframe getsparetable", 0);
1557 }
1558 req = rf_sparet_resp_queue;
1559 rf_sparet_resp_queue = req->next;
1560 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1561
1562 retcode = req->fcol;
1563 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1564 * alloc'd */
1565 return (retcode);
1566 }
1567
1568 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1569 * bp & passes it down.
1570 * any calls originating in the kernel must use non-blocking I/O
1571 * do some extra sanity checking to return "appropriate" error values for
1572 * certain conditions (to make some standard utilities work)
1573 *
1574 * Formerly known as: rf_DoAccessKernel
1575 */
1576 void
1577 raidstart(raidPtr)
1578 RF_Raid_t *raidPtr;
1579 {
1580 RF_SectorCount_t num_blocks, pb, sum;
1581 RF_RaidAddr_t raid_addr;
1582 int retcode;
1583 struct partition *pp;
1584 daddr_t blocknum;
1585 int unit;
1586 struct raid_softc *rs;
1587 int do_async;
1588 struct buf *bp;
1589
1590 unit = raidPtr->raidid;
1591 rs = &raid_softc[unit];
1592
1593 /* quick check to see if anything has died recently */
1594 RF_LOCK_MUTEX(raidPtr->mutex);
1595 if (raidPtr->numNewFailures > 0) {
1596 rf_update_component_labels(raidPtr,
1597 RF_NORMAL_COMPONENT_UPDATE);
1598 raidPtr->numNewFailures--;
1599 }
1600 RF_UNLOCK_MUTEX(raidPtr->mutex);
1601
1602 /* Check to see if we're at the limit... */
1603 RF_LOCK_MUTEX(raidPtr->mutex);
1604 while (raidPtr->openings > 0) {
1605 RF_UNLOCK_MUTEX(raidPtr->mutex);
1606
1607 /* get the next item, if any, from the queue */
1608 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1609 /* nothing more to do */
1610 return;
1611 }
1612 BUFQ_REMOVE(&rs->buf_queue, bp);
1613
1614 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1615 * partition.. Need to make it absolute to the underlying
1616 * device.. */
1617
1618 blocknum = bp->b_blkno;
1619 if (DISKPART(bp->b_dev) != RAW_PART) {
1620 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1621 blocknum += pp->p_offset;
1622 }
1623
1624 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1625 (int) blocknum));
1626
1627 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1628 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1629
1630 /* *THIS* is where we adjust what block we're going to...
1631 * but DO NOT TOUCH bp->b_blkno!!! */
1632 raid_addr = blocknum;
1633
1634 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1635 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1636 sum = raid_addr + num_blocks + pb;
1637 if (1 || rf_debugKernelAccess) {
1638 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1639 (int) raid_addr, (int) sum, (int) num_blocks,
1640 (int) pb, (int) bp->b_resid));
1641 }
1642 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1643 || (sum < num_blocks) || (sum < pb)) {
1644 bp->b_error = ENOSPC;
1645 bp->b_flags |= B_ERROR;
1646 bp->b_resid = bp->b_bcount;
1647 biodone(bp);
1648 RF_LOCK_MUTEX(raidPtr->mutex);
1649 continue;
1650 }
1651 /*
1652 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1653 */
1654
1655 if (bp->b_bcount & raidPtr->sectorMask) {
1656 bp->b_error = EINVAL;
1657 bp->b_flags |= B_ERROR;
1658 bp->b_resid = bp->b_bcount;
1659 biodone(bp);
1660 RF_LOCK_MUTEX(raidPtr->mutex);
1661 continue;
1662
1663 }
1664 db1_printf(("Calling DoAccess..\n"));
1665
1666
1667 RF_LOCK_MUTEX(raidPtr->mutex);
1668 raidPtr->openings--;
1669 RF_UNLOCK_MUTEX(raidPtr->mutex);
1670
1671 /*
1672 * Everything is async.
1673 */
1674 do_async = 1;
1675
1676 disk_busy(&rs->sc_dkdev);
1677
1678 /* XXX we're still at splbio() here... do we *really*
1679 need to be? */
1680
1681 /* don't ever condition on bp->b_flags & B_WRITE.
1682 * always condition on B_READ instead */
1683
1684 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1685 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1686 do_async, raid_addr, num_blocks,
1687 bp->b_data, bp, NULL, NULL,
1688 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1689
1690
1691 RF_LOCK_MUTEX(raidPtr->mutex);
1692 }
1693 RF_UNLOCK_MUTEX(raidPtr->mutex);
1694 }
1695
1696
1697
1698
1699 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1700
1701 int
1702 rf_DispatchKernelIO(queue, req)
1703 RF_DiskQueue_t *queue;
1704 RF_DiskQueueData_t *req;
1705 {
1706 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1707 struct buf *bp;
1708 struct raidbuf *raidbp = NULL;
1709 struct raid_softc *rs;
1710 int unit;
1711 int s;
1712
1713 s=0;
1714 /* s = splbio();*/ /* want to test this */
1715 /* XXX along with the vnode, we also need the softc associated with
1716 * this device.. */
1717
1718 req->queue = queue;
1719
1720 unit = queue->raidPtr->raidid;
1721
1722 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1723
1724 if (unit >= numraid) {
1725 printf("Invalid unit number: %d %d\n", unit, numraid);
1726 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1727 }
1728 rs = &raid_softc[unit];
1729
1730 bp = req->bp;
1731 #if 1
1732 /* XXX when there is a physical disk failure, someone is passing us a
1733 * buffer that contains old stuff!! Attempt to deal with this problem
1734 * without taking a performance hit... (not sure where the real bug
1735 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1736
1737 if (bp->b_flags & B_ERROR) {
1738 bp->b_flags &= ~B_ERROR;
1739 }
1740 if (bp->b_error != 0) {
1741 bp->b_error = 0;
1742 }
1743 #endif
1744 raidbp = RAIDGETBUF(rs);
1745
1746 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1747
1748 /*
1749 * context for raidiodone
1750 */
1751 raidbp->rf_obp = bp;
1752 raidbp->req = req;
1753
1754 LIST_INIT(&raidbp->rf_buf.b_dep);
1755
1756 switch (req->type) {
1757 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1758 /* XXX need to do something extra here.. */
1759 /* I'm leaving this in, as I've never actually seen it used,
1760 * and I'd like folks to report it... GO */
1761 printf(("WAKEUP CALLED\n"));
1762 queue->numOutstanding++;
1763
1764 /* XXX need to glue the original buffer into this?? */
1765
1766 KernelWakeupFunc(&raidbp->rf_buf);
1767 break;
1768
1769 case RF_IO_TYPE_READ:
1770 case RF_IO_TYPE_WRITE:
1771
1772 if (req->tracerec) {
1773 RF_ETIMER_START(req->tracerec->timer);
1774 }
1775 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1776 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1777 req->sectorOffset, req->numSector,
1778 req->buf, KernelWakeupFunc, (void *) req,
1779 queue->raidPtr->logBytesPerSector, req->b_proc);
1780
1781 if (rf_debugKernelAccess) {
1782 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1783 (long) bp->b_blkno));
1784 }
1785 queue->numOutstanding++;
1786 queue->last_deq_sector = req->sectorOffset;
1787 /* acc wouldn't have been let in if there were any pending
1788 * reqs at any other priority */
1789 queue->curPriority = req->priority;
1790
1791 db1_printf(("Going for %c to unit %d row %d col %d\n",
1792 req->type, unit, queue->row, queue->col));
1793 db1_printf(("sector %d count %d (%d bytes) %d\n",
1794 (int) req->sectorOffset, (int) req->numSector,
1795 (int) (req->numSector <<
1796 queue->raidPtr->logBytesPerSector),
1797 (int) queue->raidPtr->logBytesPerSector));
1798 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1799 raidbp->rf_buf.b_vp->v_numoutput++;
1800 }
1801 VOP_STRATEGY(&raidbp->rf_buf);
1802
1803 break;
1804
1805 default:
1806 panic("bad req->type in rf_DispatchKernelIO");
1807 }
1808 db1_printf(("Exiting from DispatchKernelIO\n"));
1809 /* splx(s); */ /* want to test this */
1810 return (0);
1811 }
1812 /* this is the callback function associated with a I/O invoked from
1813 kernel code.
1814 */
1815 static void
1816 KernelWakeupFunc(vbp)
1817 struct buf *vbp;
1818 {
1819 RF_DiskQueueData_t *req = NULL;
1820 RF_DiskQueue_t *queue;
1821 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1822 struct buf *bp;
1823 struct raid_softc *rs;
1824 int unit;
1825 int s;
1826
1827 s = splbio();
1828 db1_printf(("recovering the request queue:\n"));
1829 req = raidbp->req;
1830
1831 bp = raidbp->rf_obp;
1832
1833 queue = (RF_DiskQueue_t *) req->queue;
1834
1835 if (raidbp->rf_buf.b_flags & B_ERROR) {
1836 bp->b_flags |= B_ERROR;
1837 bp->b_error = raidbp->rf_buf.b_error ?
1838 raidbp->rf_buf.b_error : EIO;
1839 }
1840
1841 /* XXX methinks this could be wrong... */
1842 #if 1
1843 bp->b_resid = raidbp->rf_buf.b_resid;
1844 #endif
1845
1846 if (req->tracerec) {
1847 RF_ETIMER_STOP(req->tracerec->timer);
1848 RF_ETIMER_EVAL(req->tracerec->timer);
1849 RF_LOCK_MUTEX(rf_tracing_mutex);
1850 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1851 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1852 req->tracerec->num_phys_ios++;
1853 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1854 }
1855 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1856
1857 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1858
1859
1860 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1861 * ballistic, and mark the component as hosed... */
1862
1863 if (bp->b_flags & B_ERROR) {
1864 /* Mark the disk as dead */
1865 /* but only mark it once... */
1866 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1867 rf_ds_optimal) {
1868 printf("raid%d: IO Error. Marking %s as failed.\n",
1869 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1870 queue->raidPtr->Disks[queue->row][queue->col].status =
1871 rf_ds_failed;
1872 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1873 queue->raidPtr->numFailures++;
1874 queue->raidPtr->numNewFailures++;
1875 } else { /* Disk is already dead... */
1876 /* printf("Disk already marked as dead!\n"); */
1877 }
1878
1879 }
1880
1881 rs = &raid_softc[unit];
1882 RAIDPUTBUF(rs, raidbp);
1883
1884 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1885 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1886
1887 splx(s);
1888 }
1889
1890
1891
1892 /*
1893 * initialize a buf structure for doing an I/O in the kernel.
1894 */
1895 static void
1896 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1897 logBytesPerSector, b_proc)
1898 struct buf *bp;
1899 struct vnode *b_vp;
1900 unsigned rw_flag;
1901 dev_t dev;
1902 RF_SectorNum_t startSect;
1903 RF_SectorCount_t numSect;
1904 caddr_t buf;
1905 void (*cbFunc) (struct buf *);
1906 void *cbArg;
1907 int logBytesPerSector;
1908 struct proc *b_proc;
1909 {
1910 /* bp->b_flags = B_PHYS | rw_flag; */
1911 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1912 bp->b_bcount = numSect << logBytesPerSector;
1913 bp->b_bufsize = bp->b_bcount;
1914 bp->b_error = 0;
1915 bp->b_dev = dev;
1916 bp->b_data = buf;
1917 bp->b_blkno = startSect;
1918 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1919 if (bp->b_bcount == 0) {
1920 panic("bp->b_bcount is zero in InitBP!!\n");
1921 }
1922 bp->b_proc = b_proc;
1923 bp->b_iodone = cbFunc;
1924 bp->b_vp = b_vp;
1925
1926 }
1927
1928 static void
1929 raidgetdefaultlabel(raidPtr, rs, lp)
1930 RF_Raid_t *raidPtr;
1931 struct raid_softc *rs;
1932 struct disklabel *lp;
1933 {
1934 db1_printf(("Building a default label...\n"));
1935 bzero(lp, sizeof(*lp));
1936
1937 /* fabricate a label... */
1938 lp->d_secperunit = raidPtr->totalSectors;
1939 lp->d_secsize = raidPtr->bytesPerSector;
1940 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1941 lp->d_ntracks = 1;
1942 lp->d_ncylinders = raidPtr->totalSectors /
1943 (lp->d_nsectors * lp->d_ntracks);
1944 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1945
1946 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1947 lp->d_type = DTYPE_RAID;
1948 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1949 lp->d_rpm = 3600;
1950 lp->d_interleave = 1;
1951 lp->d_flags = 0;
1952
1953 lp->d_partitions[RAW_PART].p_offset = 0;
1954 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1955 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1956 lp->d_npartitions = RAW_PART + 1;
1957
1958 lp->d_magic = DISKMAGIC;
1959 lp->d_magic2 = DISKMAGIC;
1960 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1961
1962 }
1963 /*
1964 * Read the disklabel from the raid device. If one is not present, fake one
1965 * up.
1966 */
1967 static void
1968 raidgetdisklabel(dev)
1969 dev_t dev;
1970 {
1971 int unit = raidunit(dev);
1972 struct raid_softc *rs = &raid_softc[unit];
1973 char *errstring;
1974 struct disklabel *lp = rs->sc_dkdev.dk_label;
1975 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1976 RF_Raid_t *raidPtr;
1977
1978 db1_printf(("Getting the disklabel...\n"));
1979
1980 bzero(clp, sizeof(*clp));
1981
1982 raidPtr = raidPtrs[unit];
1983
1984 raidgetdefaultlabel(raidPtr, rs, lp);
1985
1986 /*
1987 * Call the generic disklabel extraction routine.
1988 */
1989 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1990 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1991 if (errstring)
1992 raidmakedisklabel(rs);
1993 else {
1994 int i;
1995 struct partition *pp;
1996
1997 /*
1998 * Sanity check whether the found disklabel is valid.
1999 *
2000 * This is necessary since total size of the raid device
2001 * may vary when an interleave is changed even though exactly
2002 * same componets are used, and old disklabel may used
2003 * if that is found.
2004 */
2005 if (lp->d_secperunit != rs->sc_size)
2006 printf("WARNING: %s: "
2007 "total sector size in disklabel (%d) != "
2008 "the size of raid (%ld)\n", rs->sc_xname,
2009 lp->d_secperunit, (long) rs->sc_size);
2010 for (i = 0; i < lp->d_npartitions; i++) {
2011 pp = &lp->d_partitions[i];
2012 if (pp->p_offset + pp->p_size > rs->sc_size)
2013 printf("WARNING: %s: end of partition `%c' "
2014 "exceeds the size of raid (%ld)\n",
2015 rs->sc_xname, 'a' + i, (long) rs->sc_size);
2016 }
2017 }
2018
2019 }
2020 /*
2021 * Take care of things one might want to take care of in the event
2022 * that a disklabel isn't present.
2023 */
2024 static void
2025 raidmakedisklabel(rs)
2026 struct raid_softc *rs;
2027 {
2028 struct disklabel *lp = rs->sc_dkdev.dk_label;
2029 db1_printf(("Making a label..\n"));
2030
2031 /*
2032 * For historical reasons, if there's no disklabel present
2033 * the raw partition must be marked FS_BSDFFS.
2034 */
2035
2036 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2037
2038 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2039
2040 lp->d_checksum = dkcksum(lp);
2041 }
2042 /*
2043 * Lookup the provided name in the filesystem. If the file exists,
2044 * is a valid block device, and isn't being used by anyone else,
2045 * set *vpp to the file's vnode.
2046 * You'll find the original of this in ccd.c
2047 */
2048 int
2049 raidlookup(path, p, vpp)
2050 char *path;
2051 struct proc *p;
2052 struct vnode **vpp; /* result */
2053 {
2054 struct nameidata nd;
2055 struct vnode *vp;
2056 struct vattr va;
2057 int error;
2058
2059 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2060 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
2061 #ifdef DEBUG
2062 printf("RAIDframe: vn_open returned %d\n", error);
2063 #endif
2064 return (error);
2065 }
2066 vp = nd.ni_vp;
2067 if (vp->v_usecount > 1) {
2068 VOP_UNLOCK(vp, 0);
2069 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2070 return (EBUSY);
2071 }
2072 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2073 VOP_UNLOCK(vp, 0);
2074 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2075 return (error);
2076 }
2077 /* XXX: eventually we should handle VREG, too. */
2078 if (va.va_type != VBLK) {
2079 VOP_UNLOCK(vp, 0);
2080 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2081 return (ENOTBLK);
2082 }
2083 VOP_UNLOCK(vp, 0);
2084 *vpp = vp;
2085 return (0);
2086 }
2087 /*
2088 * Wait interruptibly for an exclusive lock.
2089 *
2090 * XXX
2091 * Several drivers do this; it should be abstracted and made MP-safe.
2092 * (Hmm... where have we seen this warning before :-> GO )
2093 */
2094 static int
2095 raidlock(rs)
2096 struct raid_softc *rs;
2097 {
2098 int error;
2099
2100 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2101 rs->sc_flags |= RAIDF_WANTED;
2102 if ((error =
2103 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2104 return (error);
2105 }
2106 rs->sc_flags |= RAIDF_LOCKED;
2107 return (0);
2108 }
2109 /*
2110 * Unlock and wake up any waiters.
2111 */
2112 static void
2113 raidunlock(rs)
2114 struct raid_softc *rs;
2115 {
2116
2117 rs->sc_flags &= ~RAIDF_LOCKED;
2118 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2119 rs->sc_flags &= ~RAIDF_WANTED;
2120 wakeup(rs);
2121 }
2122 }
2123
2124
2125 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2126 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2127
2128 int
2129 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2130 {
2131 RF_ComponentLabel_t clabel;
2132 raidread_component_label(dev, b_vp, &clabel);
2133 clabel.mod_counter = mod_counter;
2134 clabel.clean = RF_RAID_CLEAN;
2135 raidwrite_component_label(dev, b_vp, &clabel);
2136 return(0);
2137 }
2138
2139
2140 int
2141 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2142 {
2143 RF_ComponentLabel_t clabel;
2144 raidread_component_label(dev, b_vp, &clabel);
2145 clabel.mod_counter = mod_counter;
2146 clabel.clean = RF_RAID_DIRTY;
2147 raidwrite_component_label(dev, b_vp, &clabel);
2148 return(0);
2149 }
2150
2151 /* ARGSUSED */
2152 int
2153 raidread_component_label(dev, b_vp, clabel)
2154 dev_t dev;
2155 struct vnode *b_vp;
2156 RF_ComponentLabel_t *clabel;
2157 {
2158 struct buf *bp;
2159 int error;
2160
2161 /* XXX should probably ensure that we don't try to do this if
2162 someone has changed rf_protected_sectors. */
2163
2164 if (b_vp == NULL) {
2165 /* For whatever reason, this component is not valid.
2166 Don't try to read a component label from it. */
2167 return(EINVAL);
2168 }
2169
2170 /* get a block of the appropriate size... */
2171 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2172 bp->b_dev = dev;
2173
2174 /* get our ducks in a row for the read */
2175 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2176 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2177 bp->b_flags |= B_READ;
2178 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2179
2180 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2181
2182 error = biowait(bp);
2183
2184 if (!error) {
2185 memcpy(clabel, bp->b_data,
2186 sizeof(RF_ComponentLabel_t));
2187 #if 0
2188 rf_print_component_label( clabel );
2189 #endif
2190 } else {
2191 #if 0
2192 printf("Failed to read RAID component label!\n");
2193 #endif
2194 }
2195
2196 brelse(bp);
2197 return(error);
2198 }
2199 /* ARGSUSED */
2200 int
2201 raidwrite_component_label(dev, b_vp, clabel)
2202 dev_t dev;
2203 struct vnode *b_vp;
2204 RF_ComponentLabel_t *clabel;
2205 {
2206 struct buf *bp;
2207 int error;
2208
2209 /* get a block of the appropriate size... */
2210 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2211 bp->b_dev = dev;
2212
2213 /* get our ducks in a row for the write */
2214 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2215 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2216 bp->b_flags |= B_WRITE;
2217 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2218
2219 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2220
2221 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2222
2223 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2224 error = biowait(bp);
2225 brelse(bp);
2226 if (error) {
2227 #if 1
2228 printf("Failed to write RAID component info!\n");
2229 #endif
2230 }
2231
2232 return(error);
2233 }
2234
2235 void
2236 rf_markalldirty(raidPtr)
2237 RF_Raid_t *raidPtr;
2238 {
2239 RF_ComponentLabel_t clabel;
2240 int r,c;
2241
2242 raidPtr->mod_counter++;
2243 for (r = 0; r < raidPtr->numRow; r++) {
2244 for (c = 0; c < raidPtr->numCol; c++) {
2245 /* we don't want to touch (at all) a disk that has
2246 failed */
2247 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2248 raidread_component_label(
2249 raidPtr->Disks[r][c].dev,
2250 raidPtr->raid_cinfo[r][c].ci_vp,
2251 &clabel);
2252 if (clabel.status == rf_ds_spared) {
2253 /* XXX do something special...
2254 but whatever you do, don't
2255 try to access it!! */
2256 } else {
2257 #if 0
2258 clabel.status =
2259 raidPtr->Disks[r][c].status;
2260 raidwrite_component_label(
2261 raidPtr->Disks[r][c].dev,
2262 raidPtr->raid_cinfo[r][c].ci_vp,
2263 &clabel);
2264 #endif
2265 raidmarkdirty(
2266 raidPtr->Disks[r][c].dev,
2267 raidPtr->raid_cinfo[r][c].ci_vp,
2268 raidPtr->mod_counter);
2269 }
2270 }
2271 }
2272 }
2273 /* printf("Component labels marked dirty.\n"); */
2274 #if 0
2275 for( c = 0; c < raidPtr->numSpare ; c++) {
2276 sparecol = raidPtr->numCol + c;
2277 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2278 /*
2279
2280 XXX this is where we get fancy and map this spare
2281 into it's correct spot in the array.
2282
2283 */
2284 /*
2285
2286 we claim this disk is "optimal" if it's
2287 rf_ds_used_spare, as that means it should be
2288 directly substitutable for the disk it replaced.
2289 We note that too...
2290
2291 */
2292
2293 for(i=0;i<raidPtr->numRow;i++) {
2294 for(j=0;j<raidPtr->numCol;j++) {
2295 if ((raidPtr->Disks[i][j].spareRow ==
2296 r) &&
2297 (raidPtr->Disks[i][j].spareCol ==
2298 sparecol)) {
2299 srow = r;
2300 scol = sparecol;
2301 break;
2302 }
2303 }
2304 }
2305
2306 raidread_component_label(
2307 raidPtr->Disks[r][sparecol].dev,
2308 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2309 &clabel);
2310 /* make sure status is noted */
2311 clabel.version = RF_COMPONENT_LABEL_VERSION;
2312 clabel.mod_counter = raidPtr->mod_counter;
2313 clabel.serial_number = raidPtr->serial_number;
2314 clabel.row = srow;
2315 clabel.column = scol;
2316 clabel.num_rows = raidPtr->numRow;
2317 clabel.num_columns = raidPtr->numCol;
2318 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2319 clabel.status = rf_ds_optimal;
2320 raidwrite_component_label(
2321 raidPtr->Disks[r][sparecol].dev,
2322 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2323 &clabel);
2324 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2325 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2326 }
2327 }
2328
2329 #endif
2330 }
2331
2332
2333 void
2334 rf_update_component_labels(raidPtr, final)
2335 RF_Raid_t *raidPtr;
2336 int final;
2337 {
2338 RF_ComponentLabel_t clabel;
2339 int sparecol;
2340 int r,c;
2341 int i,j;
2342 int srow, scol;
2343
2344 srow = -1;
2345 scol = -1;
2346
2347 /* XXX should do extra checks to make sure things really are clean,
2348 rather than blindly setting the clean bit... */
2349
2350 raidPtr->mod_counter++;
2351
2352 for (r = 0; r < raidPtr->numRow; r++) {
2353 for (c = 0; c < raidPtr->numCol; c++) {
2354 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2355 raidread_component_label(
2356 raidPtr->Disks[r][c].dev,
2357 raidPtr->raid_cinfo[r][c].ci_vp,
2358 &clabel);
2359 /* make sure status is noted */
2360 clabel.status = rf_ds_optimal;
2361 /* bump the counter */
2362 clabel.mod_counter = raidPtr->mod_counter;
2363
2364 raidwrite_component_label(
2365 raidPtr->Disks[r][c].dev,
2366 raidPtr->raid_cinfo[r][c].ci_vp,
2367 &clabel);
2368 if (final == RF_FINAL_COMPONENT_UPDATE) {
2369 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2370 raidmarkclean(
2371 raidPtr->Disks[r][c].dev,
2372 raidPtr->raid_cinfo[r][c].ci_vp,
2373 raidPtr->mod_counter);
2374 }
2375 }
2376 }
2377 /* else we don't touch it.. */
2378 }
2379 }
2380
2381 for( c = 0; c < raidPtr->numSpare ; c++) {
2382 sparecol = raidPtr->numCol + c;
2383 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2384 /*
2385
2386 we claim this disk is "optimal" if it's
2387 rf_ds_used_spare, as that means it should be
2388 directly substitutable for the disk it replaced.
2389 We note that too...
2390
2391 */
2392
2393 for(i=0;i<raidPtr->numRow;i++) {
2394 for(j=0;j<raidPtr->numCol;j++) {
2395 if ((raidPtr->Disks[i][j].spareRow ==
2396 0) &&
2397 (raidPtr->Disks[i][j].spareCol ==
2398 sparecol)) {
2399 srow = i;
2400 scol = j;
2401 break;
2402 }
2403 }
2404 }
2405
2406 /* XXX shouldn't *really* need this... */
2407 raidread_component_label(
2408 raidPtr->Disks[0][sparecol].dev,
2409 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2410 &clabel);
2411 /* make sure status is noted */
2412
2413 raid_init_component_label(raidPtr, &clabel);
2414
2415 clabel.mod_counter = raidPtr->mod_counter;
2416 clabel.row = srow;
2417 clabel.column = scol;
2418 clabel.status = rf_ds_optimal;
2419
2420 raidwrite_component_label(
2421 raidPtr->Disks[0][sparecol].dev,
2422 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2423 &clabel);
2424 if (final == RF_FINAL_COMPONENT_UPDATE) {
2425 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2426 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2427 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2428 raidPtr->mod_counter);
2429 }
2430 }
2431 }
2432 }
2433 /* printf("Component labels updated\n"); */
2434 }
2435
2436 void
2437 rf_close_component(raidPtr, vp, auto_configured)
2438 RF_Raid_t *raidPtr;
2439 struct vnode *vp;
2440 int auto_configured;
2441 {
2442 struct proc *p;
2443
2444 p = raidPtr->engine_thread;
2445
2446 if (vp != NULL) {
2447 if (auto_configured == 1) {
2448 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2449 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2450 vput(vp);
2451
2452 } else {
2453 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2454 }
2455 } else {
2456 printf("vnode was NULL\n");
2457 }
2458 }
2459
2460
2461 void
2462 rf_UnconfigureVnodes(raidPtr)
2463 RF_Raid_t *raidPtr;
2464 {
2465 int r,c;
2466 struct proc *p;
2467 struct vnode *vp;
2468 int acd;
2469
2470
2471 /* We take this opportunity to close the vnodes like we should.. */
2472
2473 p = raidPtr->engine_thread;
2474
2475 for (r = 0; r < raidPtr->numRow; r++) {
2476 for (c = 0; c < raidPtr->numCol; c++) {
2477 printf("Closing vnode for row: %d col: %d\n", r, c);
2478 vp = raidPtr->raid_cinfo[r][c].ci_vp;
2479 acd = raidPtr->Disks[r][c].auto_configured;
2480 rf_close_component(raidPtr, vp, acd);
2481 raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2482 raidPtr->Disks[r][c].auto_configured = 0;
2483 }
2484 }
2485 for (r = 0; r < raidPtr->numSpare; r++) {
2486 printf("Closing vnode for spare: %d\n", r);
2487 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2488 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2489 rf_close_component(raidPtr, vp, acd);
2490 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2491 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2492 }
2493 }
2494
2495
2496 void
2497 rf_ReconThread(req)
2498 struct rf_recon_req *req;
2499 {
2500 int s;
2501 RF_Raid_t *raidPtr;
2502
2503 s = splbio();
2504 raidPtr = (RF_Raid_t *) req->raidPtr;
2505 raidPtr->recon_in_progress = 1;
2506
2507 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2508 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2509
2510 /* XXX get rid of this! we don't need it at all.. */
2511 RF_Free(req, sizeof(*req));
2512
2513 raidPtr->recon_in_progress = 0;
2514 splx(s);
2515
2516 /* That's all... */
2517 kthread_exit(0); /* does not return */
2518 }
2519
2520 void
2521 rf_RewriteParityThread(raidPtr)
2522 RF_Raid_t *raidPtr;
2523 {
2524 int retcode;
2525 int s;
2526
2527 raidPtr->parity_rewrite_in_progress = 1;
2528 s = splbio();
2529 retcode = rf_RewriteParity(raidPtr);
2530 splx(s);
2531 if (retcode) {
2532 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2533 } else {
2534 /* set the clean bit! If we shutdown correctly,
2535 the clean bit on each component label will get
2536 set */
2537 raidPtr->parity_good = RF_RAID_CLEAN;
2538 }
2539 raidPtr->parity_rewrite_in_progress = 0;
2540
2541 /* Anyone waiting for us to stop? If so, inform them... */
2542 if (raidPtr->waitShutdown) {
2543 wakeup(&raidPtr->parity_rewrite_in_progress);
2544 }
2545
2546 /* That's all... */
2547 kthread_exit(0); /* does not return */
2548 }
2549
2550
2551 void
2552 rf_CopybackThread(raidPtr)
2553 RF_Raid_t *raidPtr;
2554 {
2555 int s;
2556
2557 raidPtr->copyback_in_progress = 1;
2558 s = splbio();
2559 rf_CopybackReconstructedData(raidPtr);
2560 splx(s);
2561 raidPtr->copyback_in_progress = 0;
2562
2563 /* That's all... */
2564 kthread_exit(0); /* does not return */
2565 }
2566
2567
2568 void
2569 rf_ReconstructInPlaceThread(req)
2570 struct rf_recon_req *req;
2571 {
2572 int retcode;
2573 int s;
2574 RF_Raid_t *raidPtr;
2575
2576 s = splbio();
2577 raidPtr = req->raidPtr;
2578 raidPtr->recon_in_progress = 1;
2579 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2580 RF_Free(req, sizeof(*req));
2581 raidPtr->recon_in_progress = 0;
2582 splx(s);
2583
2584 /* That's all... */
2585 kthread_exit(0); /* does not return */
2586 }
2587
2588 void
2589 rf_mountroot_hook(dev)
2590 struct device *dev;
2591 {
2592
2593 }
2594
2595
2596 RF_AutoConfig_t *
2597 rf_find_raid_components()
2598 {
2599 struct devnametobdevmaj *dtobdm;
2600 struct vnode *vp;
2601 struct disklabel label;
2602 struct device *dv;
2603 char *cd_name;
2604 dev_t dev;
2605 int error;
2606 int i;
2607 int good_one;
2608 RF_ComponentLabel_t *clabel;
2609 RF_AutoConfig_t *ac_list;
2610 RF_AutoConfig_t *ac;
2611
2612
2613 /* initialize the AutoConfig list */
2614 ac_list = NULL;
2615
2616 if (raidautoconfig) {
2617
2618 /* we begin by trolling through *all* the devices on the system */
2619
2620 for (dv = alldevs.tqh_first; dv != NULL;
2621 dv = dv->dv_list.tqe_next) {
2622
2623 /* we are only interested in disks... */
2624 if (dv->dv_class != DV_DISK)
2625 continue;
2626
2627 /* we don't care about floppies... */
2628 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2629 continue;
2630 }
2631
2632 /* need to find the device_name_to_block_device_major stuff */
2633 cd_name = dv->dv_cfdata->cf_driver->cd_name;
2634 dtobdm = dev_name2blk;
2635 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) {
2636 dtobdm++;
2637 }
2638
2639 /* get a vnode for the raw partition of this disk */
2640
2641 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART);
2642 if (bdevvp(dev, &vp))
2643 panic("RAID can't alloc vnode");
2644
2645 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2646
2647 if (error) {
2648 /* "Who cares." Continue looking
2649 for something that exists*/
2650 vput(vp);
2651 continue;
2652 }
2653
2654 /* Ok, the disk exists. Go get the disklabel. */
2655 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2656 FREAD, NOCRED, 0);
2657 if (error) {
2658 /*
2659 * XXX can't happen - open() would
2660 * have errored out (or faked up one)
2661 */
2662 printf("can't get label for dev %s%c (%d)!?!?\n",
2663 dv->dv_xname, 'a' + RAW_PART, error);
2664 }
2665
2666 /* don't need this any more. We'll allocate it again
2667 a little later if we really do... */
2668 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2669 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2670 vput(vp);
2671
2672 for (i=0; i < label.d_npartitions; i++) {
2673 /* We only support partitions marked as RAID */
2674 if (label.d_partitions[i].p_fstype != FS_RAID)
2675 continue;
2676
2677 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i);
2678 if (bdevvp(dev, &vp))
2679 panic("RAID can't alloc vnode");
2680
2681 error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2682 if (error) {
2683 /* Whatever... */
2684 vput(vp);
2685 continue;
2686 }
2687
2688 good_one = 0;
2689
2690 clabel = (RF_ComponentLabel_t *)
2691 malloc(sizeof(RF_ComponentLabel_t),
2692 M_RAIDFRAME, M_NOWAIT);
2693 if (clabel == NULL) {
2694 /* XXX CLEANUP HERE */
2695 printf("RAID auto config: out of memory!\n");
2696 return(NULL); /* XXX probably should panic? */
2697 }
2698
2699 if (!raidread_component_label(dev, vp, clabel)) {
2700 /* Got the label. Does it look reasonable? */
2701 if (rf_reasonable_label(clabel) &&
2702 (clabel->partitionSize <=
2703 label.d_partitions[i].p_size)) {
2704 #if DEBUG
2705 printf("Component on: %s%c: %d\n",
2706 dv->dv_xname, 'a'+i,
2707 label.d_partitions[i].p_size);
2708 rf_print_component_label(clabel);
2709 #endif
2710 /* if it's reasonable, add it,
2711 else ignore it. */
2712 ac = (RF_AutoConfig_t *)
2713 malloc(sizeof(RF_AutoConfig_t),
2714 M_RAIDFRAME,
2715 M_NOWAIT);
2716 if (ac == NULL) {
2717 /* XXX should panic?? */
2718 return(NULL);
2719 }
2720
2721 sprintf(ac->devname, "%s%c",
2722 dv->dv_xname, 'a'+i);
2723 ac->dev = dev;
2724 ac->vp = vp;
2725 ac->clabel = clabel;
2726 ac->next = ac_list;
2727 ac_list = ac;
2728 good_one = 1;
2729 }
2730 }
2731 if (!good_one) {
2732 /* cleanup */
2733 free(clabel, M_RAIDFRAME);
2734 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2735 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2736 vput(vp);
2737 }
2738 }
2739 }
2740 }
2741 return(ac_list);
2742 }
2743
2744 static int
2745 rf_reasonable_label(clabel)
2746 RF_ComponentLabel_t *clabel;
2747 {
2748
2749 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2750 (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2751 ((clabel->clean == RF_RAID_CLEAN) ||
2752 (clabel->clean == RF_RAID_DIRTY)) &&
2753 clabel->row >=0 &&
2754 clabel->column >= 0 &&
2755 clabel->num_rows > 0 &&
2756 clabel->num_columns > 0 &&
2757 clabel->row < clabel->num_rows &&
2758 clabel->column < clabel->num_columns &&
2759 clabel->blockSize > 0 &&
2760 clabel->numBlocks > 0) {
2761 /* label looks reasonable enough... */
2762 return(1);
2763 }
2764 return(0);
2765 }
2766
2767
2768 void
2769 rf_print_component_label(clabel)
2770 RF_ComponentLabel_t *clabel;
2771 {
2772 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2773 clabel->row, clabel->column,
2774 clabel->num_rows, clabel->num_columns);
2775 printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2776 clabel->version, clabel->serial_number,
2777 clabel->mod_counter);
2778 printf(" Clean: %s Status: %d\n",
2779 clabel->clean ? "Yes" : "No", clabel->status );
2780 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2781 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2782 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2783 (char) clabel->parityConfig, clabel->blockSize,
2784 clabel->numBlocks);
2785 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2786 printf(" Contains root partition: %s\n",
2787 clabel->root_partition ? "Yes" : "No" );
2788 printf(" Last configured as: raid%d\n", clabel->last_unit );
2789 #if 0
2790 printf(" Config order: %d\n", clabel->config_order);
2791 #endif
2792
2793 }
2794
2795 RF_ConfigSet_t *
2796 rf_create_auto_sets(ac_list)
2797 RF_AutoConfig_t *ac_list;
2798 {
2799 RF_AutoConfig_t *ac;
2800 RF_ConfigSet_t *config_sets;
2801 RF_ConfigSet_t *cset;
2802 RF_AutoConfig_t *ac_next;
2803
2804
2805 config_sets = NULL;
2806
2807 /* Go through the AutoConfig list, and figure out which components
2808 belong to what sets. */
2809 ac = ac_list;
2810 while(ac!=NULL) {
2811 /* we're going to putz with ac->next, so save it here
2812 for use at the end of the loop */
2813 ac_next = ac->next;
2814
2815 if (config_sets == NULL) {
2816 /* will need at least this one... */
2817 config_sets = (RF_ConfigSet_t *)
2818 malloc(sizeof(RF_ConfigSet_t),
2819 M_RAIDFRAME, M_NOWAIT);
2820 if (config_sets == NULL) {
2821 panic("rf_create_auto_sets: No memory!\n");
2822 }
2823 /* this one is easy :) */
2824 config_sets->ac = ac;
2825 config_sets->next = NULL;
2826 config_sets->rootable = 0;
2827 ac->next = NULL;
2828 } else {
2829 /* which set does this component fit into? */
2830 cset = config_sets;
2831 while(cset!=NULL) {
2832 if (rf_does_it_fit(cset, ac)) {
2833 /* looks like it matches... */
2834 ac->next = cset->ac;
2835 cset->ac = ac;
2836 break;
2837 }
2838 cset = cset->next;
2839 }
2840 if (cset==NULL) {
2841 /* didn't find a match above... new set..*/
2842 cset = (RF_ConfigSet_t *)
2843 malloc(sizeof(RF_ConfigSet_t),
2844 M_RAIDFRAME, M_NOWAIT);
2845 if (cset == NULL) {
2846 panic("rf_create_auto_sets: No memory!\n");
2847 }
2848 cset->ac = ac;
2849 ac->next = NULL;
2850 cset->next = config_sets;
2851 cset->rootable = 0;
2852 config_sets = cset;
2853 }
2854 }
2855 ac = ac_next;
2856 }
2857
2858
2859 return(config_sets);
2860 }
2861
2862 static int
2863 rf_does_it_fit(cset, ac)
2864 RF_ConfigSet_t *cset;
2865 RF_AutoConfig_t *ac;
2866 {
2867 RF_ComponentLabel_t *clabel1, *clabel2;
2868
2869 /* If this one matches the *first* one in the set, that's good
2870 enough, since the other members of the set would have been
2871 through here too... */
2872 /* note that we are not checking partitionSize here..
2873
2874 Note that we are also not checking the mod_counters here.
2875 If everything else matches execpt the mod_counter, that's
2876 good enough for this test. We will deal with the mod_counters
2877 a little later in the autoconfiguration process.
2878
2879 (clabel1->mod_counter == clabel2->mod_counter) &&
2880
2881 The reason we don't check for this is that failed disks
2882 will have lower modification counts. If those disks are
2883 not added to the set they used to belong to, then they will
2884 form their own set, which may result in 2 different sets,
2885 for example, competing to be configured at raid0, and
2886 perhaps competing to be the root filesystem set. If the
2887 wrong ones get configured, or both attempt to become /,
2888 weird behaviour and or serious lossage will occur. Thus we
2889 need to bring them into the fold here, and kick them out at
2890 a later point.
2891
2892 */
2893
2894 clabel1 = cset->ac->clabel;
2895 clabel2 = ac->clabel;
2896 if ((clabel1->version == clabel2->version) &&
2897 (clabel1->serial_number == clabel2->serial_number) &&
2898 (clabel1->num_rows == clabel2->num_rows) &&
2899 (clabel1->num_columns == clabel2->num_columns) &&
2900 (clabel1->sectPerSU == clabel2->sectPerSU) &&
2901 (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2902 (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2903 (clabel1->parityConfig == clabel2->parityConfig) &&
2904 (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2905 (clabel1->blockSize == clabel2->blockSize) &&
2906 (clabel1->numBlocks == clabel2->numBlocks) &&
2907 (clabel1->autoconfigure == clabel2->autoconfigure) &&
2908 (clabel1->root_partition == clabel2->root_partition) &&
2909 (clabel1->last_unit == clabel2->last_unit) &&
2910 (clabel1->config_order == clabel2->config_order)) {
2911 /* if it get's here, it almost *has* to be a match */
2912 } else {
2913 /* it's not consistent with somebody in the set..
2914 punt */
2915 return(0);
2916 }
2917 /* all was fine.. it must fit... */
2918 return(1);
2919 }
2920
2921 int
2922 rf_have_enough_components(cset)
2923 RF_ConfigSet_t *cset;
2924 {
2925 RF_AutoConfig_t *ac;
2926 RF_AutoConfig_t *auto_config;
2927 RF_ComponentLabel_t *clabel;
2928 int r,c;
2929 int num_rows;
2930 int num_cols;
2931 int num_missing;
2932 int mod_counter;
2933 int mod_counter_found;
2934 int even_pair_failed;
2935 char parity_type;
2936
2937
2938 /* check to see that we have enough 'live' components
2939 of this set. If so, we can configure it if necessary */
2940
2941 num_rows = cset->ac->clabel->num_rows;
2942 num_cols = cset->ac->clabel->num_columns;
2943 parity_type = cset->ac->clabel->parityConfig;
2944
2945 /* XXX Check for duplicate components!?!?!? */
2946
2947 /* Determine what the mod_counter is supposed to be for this set. */
2948
2949 mod_counter_found = 0;
2950 mod_counter = 0;
2951 ac = cset->ac;
2952 while(ac!=NULL) {
2953 if (mod_counter_found==0) {
2954 mod_counter = ac->clabel->mod_counter;
2955 mod_counter_found = 1;
2956 } else {
2957 if (ac->clabel->mod_counter > mod_counter) {
2958 mod_counter = ac->clabel->mod_counter;
2959 }
2960 }
2961 ac = ac->next;
2962 }
2963
2964 num_missing = 0;
2965 auto_config = cset->ac;
2966
2967 for(r=0; r<num_rows; r++) {
2968 even_pair_failed = 0;
2969 for(c=0; c<num_cols; c++) {
2970 ac = auto_config;
2971 while(ac!=NULL) {
2972 if ((ac->clabel->row == r) &&
2973 (ac->clabel->column == c) &&
2974 (ac->clabel->mod_counter == mod_counter)) {
2975 /* it's this one... */
2976 #if DEBUG
2977 printf("Found: %s at %d,%d\n",
2978 ac->devname,r,c);
2979 #endif
2980 break;
2981 }
2982 ac=ac->next;
2983 }
2984 if (ac==NULL) {
2985 /* Didn't find one here! */
2986 /* special case for RAID 1, especially
2987 where there are more than 2
2988 components (where RAIDframe treats
2989 things a little differently :( ) */
2990 if (parity_type == '1') {
2991 if (c%2 == 0) { /* even component */
2992 even_pair_failed = 1;
2993 } else { /* odd component. If
2994 we're failed, and
2995 so is the even
2996 component, it's
2997 "Good Night, Charlie" */
2998 if (even_pair_failed == 1) {
2999 return(0);
3000 }
3001 }
3002 } else {
3003 /* normal accounting */
3004 num_missing++;
3005 }
3006 }
3007 if ((parity_type == '1') && (c%2 == 1)) {
3008 /* Just did an even component, and we didn't
3009 bail.. reset the even_pair_failed flag,
3010 and go on to the next component.... */
3011 even_pair_failed = 0;
3012 }
3013 }
3014 }
3015
3016 clabel = cset->ac->clabel;
3017
3018 if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3019 ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3020 ((clabel->parityConfig == '5') && (num_missing > 1))) {
3021 /* XXX this needs to be made *much* more general */
3022 /* Too many failures */
3023 return(0);
3024 }
3025 /* otherwise, all is well, and we've got enough to take a kick
3026 at autoconfiguring this set */
3027 return(1);
3028 }
3029
3030 void
3031 rf_create_configuration(ac,config,raidPtr)
3032 RF_AutoConfig_t *ac;
3033 RF_Config_t *config;
3034 RF_Raid_t *raidPtr;
3035 {
3036 RF_ComponentLabel_t *clabel;
3037 int i;
3038
3039 clabel = ac->clabel;
3040
3041 /* 1. Fill in the common stuff */
3042 config->numRow = clabel->num_rows;
3043 config->numCol = clabel->num_columns;
3044 config->numSpare = 0; /* XXX should this be set here? */
3045 config->sectPerSU = clabel->sectPerSU;
3046 config->SUsPerPU = clabel->SUsPerPU;
3047 config->SUsPerRU = clabel->SUsPerRU;
3048 config->parityConfig = clabel->parityConfig;
3049 /* XXX... */
3050 strcpy(config->diskQueueType,"fifo");
3051 config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3052 config->layoutSpecificSize = 0; /* XXX ?? */
3053
3054 while(ac!=NULL) {
3055 /* row/col values will be in range due to the checks
3056 in reasonable_label() */
3057 strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3058 ac->devname);
3059 ac = ac->next;
3060 }
3061
3062 for(i=0;i<RF_MAXDBGV;i++) {
3063 config->debugVars[i][0] = NULL;
3064 }
3065 }
3066
3067 int
3068 rf_set_autoconfig(raidPtr, new_value)
3069 RF_Raid_t *raidPtr;
3070 int new_value;
3071 {
3072 RF_ComponentLabel_t clabel;
3073 struct vnode *vp;
3074 dev_t dev;
3075 int row, column;
3076
3077 raidPtr->autoconfigure = new_value;
3078 for(row=0; row<raidPtr->numRow; row++) {
3079 for(column=0; column<raidPtr->numCol; column++) {
3080 if (raidPtr->Disks[row][column].status ==
3081 rf_ds_optimal) {
3082 dev = raidPtr->Disks[row][column].dev;
3083 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3084 raidread_component_label(dev, vp, &clabel);
3085 clabel.autoconfigure = new_value;
3086 raidwrite_component_label(dev, vp, &clabel);
3087 }
3088 }
3089 }
3090 return(new_value);
3091 }
3092
3093 int
3094 rf_set_rootpartition(raidPtr, new_value)
3095 RF_Raid_t *raidPtr;
3096 int new_value;
3097 {
3098 RF_ComponentLabel_t clabel;
3099 struct vnode *vp;
3100 dev_t dev;
3101 int row, column;
3102
3103 raidPtr->root_partition = new_value;
3104 for(row=0; row<raidPtr->numRow; row++) {
3105 for(column=0; column<raidPtr->numCol; column++) {
3106 if (raidPtr->Disks[row][column].status ==
3107 rf_ds_optimal) {
3108 dev = raidPtr->Disks[row][column].dev;
3109 vp = raidPtr->raid_cinfo[row][column].ci_vp;
3110 raidread_component_label(dev, vp, &clabel);
3111 clabel.root_partition = new_value;
3112 raidwrite_component_label(dev, vp, &clabel);
3113 }
3114 }
3115 }
3116 return(new_value);
3117 }
3118
3119 void
3120 rf_release_all_vps(cset)
3121 RF_ConfigSet_t *cset;
3122 {
3123 RF_AutoConfig_t *ac;
3124
3125 ac = cset->ac;
3126 while(ac!=NULL) {
3127 /* Close the vp, and give it back */
3128 if (ac->vp) {
3129 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3130 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3131 vput(ac->vp);
3132 ac->vp = NULL;
3133 }
3134 ac = ac->next;
3135 }
3136 }
3137
3138
3139 void
3140 rf_cleanup_config_set(cset)
3141 RF_ConfigSet_t *cset;
3142 {
3143 RF_AutoConfig_t *ac;
3144 RF_AutoConfig_t *next_ac;
3145
3146 ac = cset->ac;
3147 while(ac!=NULL) {
3148 next_ac = ac->next;
3149 /* nuke the label */
3150 free(ac->clabel, M_RAIDFRAME);
3151 /* cleanup the config structure */
3152 free(ac, M_RAIDFRAME);
3153 /* "next.." */
3154 ac = next_ac;
3155 }
3156 /* and, finally, nuke the config set */
3157 free(cset, M_RAIDFRAME);
3158 }
3159
3160
3161 void
3162 raid_init_component_label(raidPtr, clabel)
3163 RF_Raid_t *raidPtr;
3164 RF_ComponentLabel_t *clabel;
3165 {
3166 /* current version number */
3167 clabel->version = RF_COMPONENT_LABEL_VERSION;
3168 clabel->serial_number = raidPtr->serial_number;
3169 clabel->mod_counter = raidPtr->mod_counter;
3170 clabel->num_rows = raidPtr->numRow;
3171 clabel->num_columns = raidPtr->numCol;
3172 clabel->clean = RF_RAID_DIRTY; /* not clean */
3173 clabel->status = rf_ds_optimal; /* "It's good!" */
3174
3175 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3176 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3177 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3178
3179 clabel->blockSize = raidPtr->bytesPerSector;
3180 clabel->numBlocks = raidPtr->sectorsPerDisk;
3181
3182 /* XXX not portable */
3183 clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3184 clabel->maxOutstanding = raidPtr->maxOutstanding;
3185 clabel->autoconfigure = raidPtr->autoconfigure;
3186 clabel->root_partition = raidPtr->root_partition;
3187 clabel->last_unit = raidPtr->raidid;
3188 clabel->config_order = raidPtr->config_order;
3189 }
3190
3191 int
3192 rf_auto_config_set(cset,unit)
3193 RF_ConfigSet_t *cset;
3194 int *unit;
3195 {
3196 RF_Raid_t *raidPtr;
3197 RF_Config_t *config;
3198 int raidID;
3199 int retcode;
3200
3201 printf("RAID autoconfigure\n");
3202
3203 retcode = 0;
3204 *unit = -1;
3205
3206 /* 1. Create a config structure */
3207
3208 config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3209 M_RAIDFRAME,
3210 M_NOWAIT);
3211 if (config==NULL) {
3212 printf("Out of mem!?!?\n");
3213 /* XXX do something more intelligent here. */
3214 return(1);
3215 }
3216
3217 memset(config, 0, sizeof(RF_Config_t));
3218
3219 /* XXX raidID needs to be set correctly.. */
3220
3221 /*
3222 2. Figure out what RAID ID this one is supposed to live at
3223 See if we can get the same RAID dev that it was configured
3224 on last time..
3225 */
3226
3227 raidID = cset->ac->clabel->last_unit;
3228 if ((raidID < 0) || (raidID >= numraid)) {
3229 /* let's not wander off into lala land. */
3230 raidID = numraid - 1;
3231 }
3232 if (raidPtrs[raidID]->valid != 0) {
3233
3234 /*
3235 Nope... Go looking for an alternative...
3236 Start high so we don't immediately use raid0 if that's
3237 not taken.
3238 */
3239
3240 for(raidID = numraid; raidID >= 0; raidID--) {
3241 if (raidPtrs[raidID]->valid == 0) {
3242 /* can use this one! */
3243 break;
3244 }
3245 }
3246 }
3247
3248 if (raidID < 0) {
3249 /* punt... */
3250 printf("Unable to auto configure this set!\n");
3251 printf("(Out of RAID devs!)\n");
3252 return(1);
3253 }
3254 printf("Configuring raid%d:\n",raidID);
3255 raidPtr = raidPtrs[raidID];
3256
3257 /* XXX all this stuff should be done SOMEWHERE ELSE! */
3258 raidPtr->raidid = raidID;
3259 raidPtr->openings = RAIDOUTSTANDING;
3260
3261 /* 3. Build the configuration structure */
3262 rf_create_configuration(cset->ac, config, raidPtr);
3263
3264 /* 4. Do the configuration */
3265 retcode = rf_Configure(raidPtr, config, cset->ac);
3266
3267 if (retcode == 0) {
3268
3269 raidinit(raidPtrs[raidID]);
3270
3271 rf_markalldirty(raidPtrs[raidID]);
3272 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3273 if (cset->ac->clabel->root_partition==1) {
3274 /* everything configured just fine. Make a note
3275 that this set is eligible to be root. */
3276 cset->rootable = 1;
3277 /* XXX do this here? */
3278 raidPtrs[raidID]->root_partition = 1;
3279 }
3280 }
3281
3282 /* 5. Cleanup */
3283 free(config, M_RAIDFRAME);
3284
3285 *unit = raidID;
3286 return(retcode);
3287 }
3288
3289 void
3290 rf_disk_unbusy(desc)
3291 RF_RaidAccessDesc_t *desc;
3292 {
3293 struct buf *bp;
3294
3295 bp = (struct buf *)desc->bp;
3296 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3297 (bp->b_bcount - bp->b_resid));
3298 }
3299