rf_netbsdkintf.c revision 1.45 1 /* $NetBSD: rf_netbsdkintf.c,v 1.45 2000/01/09 02:56:13 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db0_printf(a) printf a
159 #define db_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
161 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
162 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
163 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
164 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
165 #else /* DEBUG */
166 #define db0_printf(a) printf a
167 #define db1_printf(a) { }
168 #define db2_printf(a) { }
169 #define db3_printf(a) { }
170 #define db4_printf(a) { }
171 #define db5_printf(a) { }
172 #endif /* DEBUG */
173
174 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
175
176 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
177
178 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
179 * spare table */
180 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
181 * installation process */
182
183 /* prototypes */
184 static void KernelWakeupFunc(struct buf * bp);
185 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
186 dev_t dev, RF_SectorNum_t startSect,
187 RF_SectorCount_t numSect, caddr_t buf,
188 void (*cbFunc) (struct buf *), void *cbArg,
189 int logBytesPerSector, struct proc * b_proc);
190
191 void raidattach __P((int));
192 int raidsize __P((dev_t));
193
194 static int raidinit __P((dev_t, RF_Raid_t *, int));
195
196 int raidopen __P((dev_t, int, int, struct proc *));
197 int raidclose __P((dev_t, int, int, struct proc *));
198 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
199 int raidwrite __P((dev_t, struct uio *, int));
200 int raidread __P((dev_t, struct uio *, int));
201 void raidstrategy __P((struct buf *));
202 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
203
204 /*
205 * Pilfered from ccd.c
206 */
207
208 struct raidbuf {
209 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
210 struct buf *rf_obp; /* ptr. to original I/O buf */
211 int rf_flags; /* misc. flags */
212 RF_DiskQueueData_t *req;/* the request that this was part of.. */
213 };
214
215
216 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
217 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
218
219 /* XXX Not sure if the following should be replacing the raidPtrs above,
220 or if it should be used in conjunction with that... */
221
222 struct raid_softc {
223 int sc_flags; /* flags */
224 int sc_cflags; /* configuration flags */
225 size_t sc_size; /* size of the raid device */
226 dev_t sc_dev; /* our device.. */
227 char sc_xname[20]; /* XXX external name */
228 struct disk sc_dkdev; /* generic disk device info */
229 struct pool sc_cbufpool; /* component buffer pool */
230 struct buf buf_queue; /* used for the device queue */
231 };
232 /* sc_flags */
233 #define RAIDF_INITED 0x01 /* unit has been initialized */
234 #define RAIDF_WLABEL 0x02 /* label area is writable */
235 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
236 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
237 #define RAIDF_LOCKED 0x80 /* unit is locked */
238
239 #define raidunit(x) DISKUNIT(x)
240 static int numraid = 0;
241
242 /*
243 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
244 * Be aware that large numbers can allow the driver to consume a lot of
245 * kernel memory, especially on writes, and in degraded mode reads.
246 *
247 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
248 * a single 64K write will typically require 64K for the old data,
249 * 64K for the old parity, and 64K for the new parity, for a total
250 * of 192K (if the parity buffer is not re-used immediately).
251 * Even it if is used immedately, that's still 128K, which when multiplied
252 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
253 *
254 * Now in degraded mode, for example, a 64K read on the above setup may
255 * require data reconstruction, which will require *all* of the 4 remaining
256 * disks to participate -- 4 * 32K/disk == 128K again.
257 */
258
259 #ifndef RAIDOUTSTANDING
260 #define RAIDOUTSTANDING 6
261 #endif
262
263 #define RAIDLABELDEV(dev) \
264 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
265
266 /* declared here, and made public, for the benefit of KVM stuff.. */
267 struct raid_softc *raid_softc;
268
269 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
270 struct disklabel *));
271 static void raidgetdisklabel __P((dev_t));
272 static void raidmakedisklabel __P((struct raid_softc *));
273
274 static int raidlock __P((struct raid_softc *));
275 static void raidunlock __P((struct raid_softc *));
276
277 static void rf_markalldirty __P((RF_Raid_t *));
278
279 void rf_ReconThread __P((struct rf_recon_req *));
280 /* XXX what I want is: */
281 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
282 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
283 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
284 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
285
286 void
287 raidattach(num)
288 int num;
289 {
290 int raidID;
291 int i, rc;
292
293 #ifdef DEBUG
294 printf("raidattach: Asked for %d units\n", num);
295 #endif
296
297 if (num <= 0) {
298 #ifdef DIAGNOSTIC
299 panic("raidattach: count <= 0");
300 #endif
301 return;
302 }
303 /* This is where all the initialization stuff gets done. */
304
305 /* Make some space for requested number of units... */
306
307 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
308 if (raidPtrs == NULL) {
309 panic("raidPtrs is NULL!!\n");
310 }
311
312 rc = rf_mutex_init(&rf_sparet_wait_mutex);
313 if (rc) {
314 RF_PANIC();
315 }
316
317 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
318
319 for (i = 0; i < numraid; i++)
320 raidPtrs[i] = NULL;
321 rc = rf_BootRaidframe();
322 if (rc == 0)
323 printf("Kernelized RAIDframe activated\n");
324 else
325 panic("Serious error booting RAID!!\n");
326
327 /* put together some datastructures like the CCD device does.. This
328 * lets us lock the device and what-not when it gets opened. */
329
330 raid_softc = (struct raid_softc *)
331 malloc(num * sizeof(struct raid_softc),
332 M_RAIDFRAME, M_NOWAIT);
333 if (raid_softc == NULL) {
334 printf("WARNING: no memory for RAIDframe driver\n");
335 return;
336 }
337 numraid = num;
338 bzero(raid_softc, num * sizeof(struct raid_softc));
339
340 for (raidID = 0; raidID < num; raidID++) {
341 raid_softc[raidID].buf_queue.b_actf = NULL;
342 raid_softc[raidID].buf_queue.b_actb =
343 &raid_softc[raidID].buf_queue.b_actf;
344 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
345 (RF_Raid_t *));
346 if (raidPtrs[raidID] == NULL) {
347 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
348 numraid = raidID;
349 return;
350 }
351 }
352 }
353
354
355 int
356 raidsize(dev)
357 dev_t dev;
358 {
359 struct raid_softc *rs;
360 struct disklabel *lp;
361 int part, unit, omask, size;
362
363 unit = raidunit(dev);
364 if (unit >= numraid)
365 return (-1);
366 rs = &raid_softc[unit];
367
368 if ((rs->sc_flags & RAIDF_INITED) == 0)
369 return (-1);
370
371 part = DISKPART(dev);
372 omask = rs->sc_dkdev.dk_openmask & (1 << part);
373 lp = rs->sc_dkdev.dk_label;
374
375 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
376 return (-1);
377
378 if (lp->d_partitions[part].p_fstype != FS_SWAP)
379 size = -1;
380 else
381 size = lp->d_partitions[part].p_size *
382 (lp->d_secsize / DEV_BSIZE);
383
384 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
385 return (-1);
386
387 return (size);
388
389 }
390
391 int
392 raiddump(dev, blkno, va, size)
393 dev_t dev;
394 daddr_t blkno;
395 caddr_t va;
396 size_t size;
397 {
398 /* Not implemented. */
399 return ENXIO;
400 }
401 /* ARGSUSED */
402 int
403 raidopen(dev, flags, fmt, p)
404 dev_t dev;
405 int flags, fmt;
406 struct proc *p;
407 {
408 int unit = raidunit(dev);
409 struct raid_softc *rs;
410 struct disklabel *lp;
411 int part, pmask;
412 int error = 0;
413
414 if (unit >= numraid)
415 return (ENXIO);
416 rs = &raid_softc[unit];
417
418 if ((error = raidlock(rs)) != 0)
419 return (error);
420 lp = rs->sc_dkdev.dk_label;
421
422 part = DISKPART(dev);
423 pmask = (1 << part);
424
425 db1_printf(("Opening raid device number: %d partition: %d\n",
426 unit, part));
427
428
429 if ((rs->sc_flags & RAIDF_INITED) &&
430 (rs->sc_dkdev.dk_openmask == 0))
431 raidgetdisklabel(dev);
432
433 /* make sure that this partition exists */
434
435 if (part != RAW_PART) {
436 db1_printf(("Not a raw partition..\n"));
437 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
438 ((part >= lp->d_npartitions) ||
439 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
440 error = ENXIO;
441 raidunlock(rs);
442 db1_printf(("Bailing out...\n"));
443 return (error);
444 }
445 }
446 /* Prevent this unit from being unconfigured while open. */
447 switch (fmt) {
448 case S_IFCHR:
449 rs->sc_dkdev.dk_copenmask |= pmask;
450 break;
451
452 case S_IFBLK:
453 rs->sc_dkdev.dk_bopenmask |= pmask;
454 break;
455 }
456
457 if ((rs->sc_dkdev.dk_openmask == 0) &&
458 ((rs->sc_flags & RAIDF_INITED) != 0)) {
459 /* First one... mark things as dirty... Note that we *MUST*
460 have done a configure before this. I DO NOT WANT TO BE
461 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
462 THAT THEY BELONG TOGETHER!!!!! */
463 /* XXX should check to see if we're only open for reading
464 here... If so, we needn't do this, but then need some
465 other way of keeping track of what's happened.. */
466
467 rf_markalldirty( raidPtrs[unit] );
468 }
469
470
471 rs->sc_dkdev.dk_openmask =
472 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
473
474 raidunlock(rs);
475
476 return (error);
477
478
479 }
480 /* ARGSUSED */
481 int
482 raidclose(dev, flags, fmt, p)
483 dev_t dev;
484 int flags, fmt;
485 struct proc *p;
486 {
487 int unit = raidunit(dev);
488 struct raid_softc *rs;
489 int error = 0;
490 int part;
491
492 if (unit >= numraid)
493 return (ENXIO);
494 rs = &raid_softc[unit];
495
496 if ((error = raidlock(rs)) != 0)
497 return (error);
498
499 part = DISKPART(dev);
500
501 /* ...that much closer to allowing unconfiguration... */
502 switch (fmt) {
503 case S_IFCHR:
504 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
505 break;
506
507 case S_IFBLK:
508 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
509 break;
510 }
511 rs->sc_dkdev.dk_openmask =
512 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
513
514 if ((rs->sc_dkdev.dk_openmask == 0) &&
515 ((rs->sc_flags & RAIDF_INITED) != 0)) {
516 /* Last one... device is not unconfigured yet.
517 Device shutdown has taken care of setting the
518 clean bits if RAIDF_INITED is not set
519 mark things as clean... */
520 rf_update_component_labels( raidPtrs[unit] );
521 }
522
523 raidunlock(rs);
524 return (0);
525
526 }
527
528 void
529 raidstrategy(bp)
530 register struct buf *bp;
531 {
532 register int s;
533
534 unsigned int raidID = raidunit(bp->b_dev);
535 RF_Raid_t *raidPtr;
536 struct raid_softc *rs = &raid_softc[raidID];
537 struct disklabel *lp;
538 struct buf *dp;
539 int wlabel;
540
541 if ((rs->sc_flags & RAIDF_INITED) ==0) {
542 bp->b_error = ENXIO;
543 bp->b_flags = B_ERROR;
544 bp->b_resid = bp->b_bcount;
545 biodone(bp);
546 return;
547 }
548 if (raidID >= numraid || !raidPtrs[raidID]) {
549 bp->b_error = ENODEV;
550 bp->b_flags |= B_ERROR;
551 bp->b_resid = bp->b_bcount;
552 biodone(bp);
553 return;
554 }
555 raidPtr = raidPtrs[raidID];
556 if (!raidPtr->valid) {
557 bp->b_error = ENODEV;
558 bp->b_flags |= B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 if (bp->b_bcount == 0) {
564 db1_printf(("b_bcount is zero..\n"));
565 biodone(bp);
566 return;
567 }
568 lp = rs->sc_dkdev.dk_label;
569
570 /*
571 * Do bounds checking and adjust transfer. If there's an
572 * error, the bounds check will flag that for us.
573 */
574
575 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
576 if (DISKPART(bp->b_dev) != RAW_PART)
577 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
578 db1_printf(("Bounds check failed!!:%d %d\n",
579 (int) bp->b_blkno, (int) wlabel));
580 biodone(bp);
581 return;
582 }
583 s = splbio();
584
585 bp->b_resid = 0;
586
587 /* stuff it onto our queue */
588
589 dp = &rs->buf_queue;
590 bp->b_actf = NULL;
591 bp->b_actb = dp->b_actb;
592 *dp->b_actb = bp;
593 dp->b_actb = &bp->b_actf;
594
595 raidstart(raidPtrs[raidID]);
596
597 splx(s);
598 }
599 /* ARGSUSED */
600 int
601 raidread(dev, uio, flags)
602 dev_t dev;
603 struct uio *uio;
604 int flags;
605 {
606 int unit = raidunit(dev);
607 struct raid_softc *rs;
608 int part;
609
610 if (unit >= numraid)
611 return (ENXIO);
612 rs = &raid_softc[unit];
613
614 if ((rs->sc_flags & RAIDF_INITED) == 0)
615 return (ENXIO);
616 part = DISKPART(dev);
617
618 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
619
620 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
621
622 }
623 /* ARGSUSED */
624 int
625 raidwrite(dev, uio, flags)
626 dev_t dev;
627 struct uio *uio;
628 int flags;
629 {
630 int unit = raidunit(dev);
631 struct raid_softc *rs;
632
633 if (unit >= numraid)
634 return (ENXIO);
635 rs = &raid_softc[unit];
636
637 if ((rs->sc_flags & RAIDF_INITED) == 0)
638 return (ENXIO);
639 db1_printf(("raidwrite\n"));
640 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
641
642 }
643
644 int
645 raidioctl(dev, cmd, data, flag, p)
646 dev_t dev;
647 u_long cmd;
648 caddr_t data;
649 int flag;
650 struct proc *p;
651 {
652 int unit = raidunit(dev);
653 int error = 0;
654 int part, pmask;
655 struct raid_softc *rs;
656 RF_Config_t *k_cfg, *u_cfg;
657 RF_Raid_t *raidPtr;
658 RF_AccTotals_t *totals;
659 RF_DeviceConfig_t *d_cfg, **ucfgp;
660 u_char *specific_buf;
661 int retcode = 0;
662 int row;
663 int column;
664 struct rf_recon_req *rrcopy, *rr;
665 RF_ComponentLabel_t *component_label;
666 RF_ComponentLabel_t ci_label;
667 RF_ComponentLabel_t **c_label_ptr;
668 RF_SingleComponent_t *sparePtr,*componentPtr;
669 RF_SingleComponent_t hot_spare;
670 RF_SingleComponent_t component;
671 int i, j, d;
672
673 if (unit >= numraid)
674 return (ENXIO);
675 rs = &raid_softc[unit];
676 raidPtr = raidPtrs[unit];
677
678 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
679 (int) DISKPART(dev), (int) unit, (int) cmd));
680
681 /* Must be open for writes for these commands... */
682 switch (cmd) {
683 case DIOCSDINFO:
684 case DIOCWDINFO:
685 case DIOCWLABEL:
686 if ((flag & FWRITE) == 0)
687 return (EBADF);
688 }
689
690 /* Must be initialized for these... */
691 switch (cmd) {
692 case DIOCGDINFO:
693 case DIOCSDINFO:
694 case DIOCWDINFO:
695 case DIOCGPART:
696 case DIOCWLABEL:
697 case DIOCGDEFLABEL:
698 case RAIDFRAME_SHUTDOWN:
699 case RAIDFRAME_REWRITEPARITY:
700 case RAIDFRAME_GET_INFO:
701 case RAIDFRAME_RESET_ACCTOTALS:
702 case RAIDFRAME_GET_ACCTOTALS:
703 case RAIDFRAME_KEEP_ACCTOTALS:
704 case RAIDFRAME_GET_SIZE:
705 case RAIDFRAME_FAIL_DISK:
706 case RAIDFRAME_COPYBACK:
707 case RAIDFRAME_CHECK_RECON_STATUS:
708 case RAIDFRAME_GET_COMPONENT_LABEL:
709 case RAIDFRAME_SET_COMPONENT_LABEL:
710 case RAIDFRAME_ADD_HOT_SPARE:
711 case RAIDFRAME_REMOVE_HOT_SPARE:
712 case RAIDFRAME_INIT_LABELS:
713 case RAIDFRAME_REBUILD_IN_PLACE:
714 case RAIDFRAME_CHECK_PARITY:
715 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
716 case RAIDFRAME_CHECK_COPYBACK_STATUS:
717 if ((rs->sc_flags & RAIDF_INITED) == 0)
718 return (ENXIO);
719 }
720
721 switch (cmd) {
722
723 /* configure the system */
724 case RAIDFRAME_CONFIGURE:
725 /* copy-in the configuration information */
726 /* data points to a pointer to the configuration structure */
727
728 u_cfg = *((RF_Config_t **) data);
729 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
730 if (k_cfg == NULL) {
731 return (ENOMEM);
732 }
733 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
734 sizeof(RF_Config_t));
735 if (retcode) {
736 RF_Free(k_cfg, sizeof(RF_Config_t));
737 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
738 retcode));
739 return (retcode);
740 }
741 /* allocate a buffer for the layout-specific data, and copy it
742 * in */
743 if (k_cfg->layoutSpecificSize) {
744 if (k_cfg->layoutSpecificSize > 10000) {
745 /* sanity check */
746 RF_Free(k_cfg, sizeof(RF_Config_t));
747 return (EINVAL);
748 }
749 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
750 (u_char *));
751 if (specific_buf == NULL) {
752 RF_Free(k_cfg, sizeof(RF_Config_t));
753 return (ENOMEM);
754 }
755 retcode = copyin(k_cfg->layoutSpecific,
756 (caddr_t) specific_buf,
757 k_cfg->layoutSpecificSize);
758 if (retcode) {
759 RF_Free(k_cfg, sizeof(RF_Config_t));
760 RF_Free(specific_buf,
761 k_cfg->layoutSpecificSize);
762 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
763 retcode));
764 return (retcode);
765 }
766 } else
767 specific_buf = NULL;
768 k_cfg->layoutSpecific = specific_buf;
769
770 /* should do some kind of sanity check on the configuration.
771 * Store the sum of all the bytes in the last byte? */
772
773 /* configure the system */
774
775 raidPtr->raidid = unit;
776
777 retcode = rf_Configure(raidPtr, k_cfg);
778
779 if (retcode == 0) {
780
781 /* allow this many simultaneous IO's to
782 this RAID device */
783 raidPtr->openings = RAIDOUTSTANDING;
784
785 /* XXX should be moved to rf_Configure() */
786
787 raidPtr->copyback_in_progress = 0;
788 raidPtr->parity_rewrite_in_progress = 0;
789 raidPtr->recon_in_progress = 0;
790
791 retcode = raidinit(dev, raidPtr, unit);
792 rf_markalldirty( raidPtr );
793 }
794 /* free the buffers. No return code here. */
795 if (k_cfg->layoutSpecificSize) {
796 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
797 }
798 RF_Free(k_cfg, sizeof(RF_Config_t));
799
800 return (retcode);
801
802 /* shutdown the system */
803 case RAIDFRAME_SHUTDOWN:
804
805 if ((error = raidlock(rs)) != 0)
806 return (error);
807
808 /*
809 * If somebody has a partition mounted, we shouldn't
810 * shutdown.
811 */
812
813 part = DISKPART(dev);
814 pmask = (1 << part);
815 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
816 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
817 (rs->sc_dkdev.dk_copenmask & pmask))) {
818 raidunlock(rs);
819 return (EBUSY);
820 }
821
822 retcode = rf_Shutdown(raidPtr);
823
824 pool_destroy(&rs->sc_cbufpool);
825
826 /* It's no longer initialized... */
827 rs->sc_flags &= ~RAIDF_INITED;
828
829 /* Detach the disk. */
830 disk_detach(&rs->sc_dkdev);
831
832 raidunlock(rs);
833
834 return (retcode);
835 case RAIDFRAME_GET_COMPONENT_LABEL:
836 c_label_ptr = (RF_ComponentLabel_t **) data;
837 /* need to read the component label for the disk indicated
838 by row,column in component_label */
839
840 /* For practice, let's get it directly fromdisk, rather
841 than from the in-core copy */
842 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
843 (RF_ComponentLabel_t *));
844 if (component_label == NULL)
845 return (ENOMEM);
846
847 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
848
849 retcode = copyin( *c_label_ptr, component_label,
850 sizeof(RF_ComponentLabel_t));
851
852 if (retcode) {
853 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
854 return(retcode);
855 }
856
857 row = component_label->row;
858 column = component_label->column;
859
860 if ((row < 0) || (row >= raidPtr->numRow) ||
861 (column < 0) || (column >= raidPtr->numCol)) {
862 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
863 return(EINVAL);
864 }
865
866 raidread_component_label(
867 raidPtr->Disks[row][column].dev,
868 raidPtr->raid_cinfo[row][column].ci_vp,
869 component_label );
870
871 retcode = copyout((caddr_t) component_label,
872 (caddr_t) *c_label_ptr,
873 sizeof(RF_ComponentLabel_t));
874 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
875 return (retcode);
876
877 case RAIDFRAME_SET_COMPONENT_LABEL:
878 component_label = (RF_ComponentLabel_t *) data;
879
880 /* XXX check the label for valid stuff... */
881 /* Note that some things *should not* get modified --
882 the user should be re-initing the labels instead of
883 trying to patch things.
884 */
885
886 printf("Got component label:\n");
887 printf("Version: %d\n",component_label->version);
888 printf("Serial Number: %d\n",component_label->serial_number);
889 printf("Mod counter: %d\n",component_label->mod_counter);
890 printf("Row: %d\n", component_label->row);
891 printf("Column: %d\n", component_label->column);
892 printf("Num Rows: %d\n", component_label->num_rows);
893 printf("Num Columns: %d\n", component_label->num_columns);
894 printf("Clean: %d\n", component_label->clean);
895 printf("Status: %d\n", component_label->status);
896
897 row = component_label->row;
898 column = component_label->column;
899
900 if ((row < 0) || (row >= raidPtr->numRow) ||
901 (column < 0) || (column >= raidPtr->numCol)) {
902 return(EINVAL);
903 }
904
905 /* XXX this isn't allowed to do anything for now :-) */
906 #if 0
907 raidwrite_component_label(
908 raidPtr->Disks[row][column].dev,
909 raidPtr->raid_cinfo[row][column].ci_vp,
910 component_label );
911 #endif
912 return (0);
913
914 case RAIDFRAME_INIT_LABELS:
915 component_label = (RF_ComponentLabel_t *) data;
916 /*
917 we only want the serial number from
918 the above. We get all the rest of the information
919 from the config that was used to create this RAID
920 set.
921 */
922
923 raidPtr->serial_number = component_label->serial_number;
924 /* current version number */
925 ci_label.version = RF_COMPONENT_LABEL_VERSION;
926 ci_label.serial_number = component_label->serial_number;
927 ci_label.mod_counter = raidPtr->mod_counter;
928 ci_label.num_rows = raidPtr->numRow;
929 ci_label.num_columns = raidPtr->numCol;
930 ci_label.clean = RF_RAID_DIRTY; /* not clean */
931 ci_label.status = rf_ds_optimal; /* "It's good!" */
932
933 for(row=0;row<raidPtr->numRow;row++) {
934 ci_label.row = row;
935 for(column=0;column<raidPtr->numCol;column++) {
936 ci_label.column = column;
937 raidwrite_component_label(
938 raidPtr->Disks[row][column].dev,
939 raidPtr->raid_cinfo[row][column].ci_vp,
940 &ci_label );
941 }
942 }
943
944 return (retcode);
945
946 /* initialize all parity */
947 case RAIDFRAME_REWRITEPARITY:
948
949 if (raidPtr->Layout.map->faultsTolerated == 0) {
950 /* Parity for RAID 0 is trivially correct */
951 raidPtr->parity_good = RF_RAID_CLEAN;
952 return(0);
953 }
954
955 if (raidPtr->parity_rewrite_in_progress == 1) {
956 /* Re-write is already in progress! */
957 return(EINVAL);
958 }
959
960 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
961 rf_RewriteParityThread,
962 raidPtr,"raid_parity");
963 return (retcode);
964
965
966 case RAIDFRAME_ADD_HOT_SPARE:
967 sparePtr = (RF_SingleComponent_t *) data;
968 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
969 printf("Adding spare\n");
970 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
971 return(retcode);
972
973 case RAIDFRAME_REMOVE_HOT_SPARE:
974 return(retcode);
975
976 case RAIDFRAME_REBUILD_IN_PLACE:
977
978 if (raidPtr->Layout.map->faultsTolerated == 0) {
979 /* Can't do this on a RAID 0!! */
980 return(EINVAL);
981 }
982
983 if (raidPtr->recon_in_progress == 1) {
984 /* a reconstruct is already in progress! */
985 return(EINVAL);
986 }
987
988 componentPtr = (RF_SingleComponent_t *) data;
989 memcpy( &component, componentPtr,
990 sizeof(RF_SingleComponent_t));
991 row = component.row;
992 column = component.column;
993 printf("Rebuild: %d %d\n",row, column);
994 if ((row < 0) || (row >= raidPtr->numRow) ||
995 (column < 0) || (column >= raidPtr->numCol)) {
996 return(EINVAL);
997 }
998
999 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1000 if (rrcopy == NULL)
1001 return(ENOMEM);
1002
1003 rrcopy->raidPtr = (void *) raidPtr;
1004 rrcopy->row = row;
1005 rrcopy->col = column;
1006
1007 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1008 rf_ReconstructInPlaceThread,
1009 rrcopy,"raid_reconip");
1010 return(retcode);
1011
1012 case RAIDFRAME_GET_INFO:
1013 if (!raidPtr->valid)
1014 return (ENODEV);
1015 ucfgp = (RF_DeviceConfig_t **) data;
1016 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1017 (RF_DeviceConfig_t *));
1018 if (d_cfg == NULL)
1019 return (ENOMEM);
1020 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1021 d_cfg->rows = raidPtr->numRow;
1022 d_cfg->cols = raidPtr->numCol;
1023 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1024 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1025 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1026 return (ENOMEM);
1027 }
1028 d_cfg->nspares = raidPtr->numSpare;
1029 if (d_cfg->nspares >= RF_MAX_DISKS) {
1030 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1031 return (ENOMEM);
1032 }
1033 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1034 d = 0;
1035 for (i = 0; i < d_cfg->rows; i++) {
1036 for (j = 0; j < d_cfg->cols; j++) {
1037 d_cfg->devs[d] = raidPtr->Disks[i][j];
1038 d++;
1039 }
1040 }
1041 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1042 d_cfg->spares[i] = raidPtr->Disks[0][j];
1043 }
1044 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1045 sizeof(RF_DeviceConfig_t));
1046 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1047
1048 return (retcode);
1049
1050 case RAIDFRAME_CHECK_PARITY:
1051 *(int *) data = raidPtr->parity_good;
1052 return (0);
1053
1054 case RAIDFRAME_RESET_ACCTOTALS:
1055 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1056 return (0);
1057
1058 case RAIDFRAME_GET_ACCTOTALS:
1059 totals = (RF_AccTotals_t *) data;
1060 *totals = raidPtr->acc_totals;
1061 return (0);
1062
1063 case RAIDFRAME_KEEP_ACCTOTALS:
1064 raidPtr->keep_acc_totals = *(int *)data;
1065 return (0);
1066
1067 case RAIDFRAME_GET_SIZE:
1068 *(int *) data = raidPtr->totalSectors;
1069 return (0);
1070
1071 /* fail a disk & optionally start reconstruction */
1072 case RAIDFRAME_FAIL_DISK:
1073
1074 if (raidPtr->Layout.map->faultsTolerated == 0) {
1075 /* Can't do this on a RAID 0!! */
1076 return(EINVAL);
1077 }
1078
1079 rr = (struct rf_recon_req *) data;
1080
1081 if (rr->row < 0 || rr->row >= raidPtr->numRow
1082 || rr->col < 0 || rr->col >= raidPtr->numCol)
1083 return (EINVAL);
1084
1085 printf("raid%d: Failing the disk: row: %d col: %d\n",
1086 unit, rr->row, rr->col);
1087
1088 /* make a copy of the recon request so that we don't rely on
1089 * the user's buffer */
1090 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1091 if (rrcopy == NULL)
1092 return(ENOMEM);
1093 bcopy(rr, rrcopy, sizeof(*rr));
1094 rrcopy->raidPtr = (void *) raidPtr;
1095
1096 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1097 rf_ReconThread,
1098 rrcopy,"raid_recon");
1099 return (0);
1100
1101 /* invoke a copyback operation after recon on whatever disk
1102 * needs it, if any */
1103 case RAIDFRAME_COPYBACK:
1104
1105 if (raidPtr->Layout.map->faultsTolerated == 0) {
1106 /* This makes no sense on a RAID 0!! */
1107 return(EINVAL);
1108 }
1109
1110 if (raidPtr->copyback_in_progress == 1) {
1111 /* Copyback is already in progress! */
1112 return(EINVAL);
1113 }
1114
1115 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1116 rf_CopybackThread,
1117 raidPtr,"raid_copyback");
1118 return (retcode);
1119
1120 /* return the percentage completion of reconstruction */
1121 case RAIDFRAME_CHECK_RECON_STATUS:
1122 if (raidPtr->Layout.map->faultsTolerated == 0) {
1123 /* This makes no sense on a RAID 0 */
1124 return(EINVAL);
1125 }
1126 row = 0; /* XXX we only consider a single row... */
1127 if (raidPtr->status[row] != rf_rs_reconstructing)
1128 *(int *) data = 100;
1129 else
1130 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1131 return (0);
1132
1133 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1134 if (raidPtr->Layout.map->faultsTolerated == 0) {
1135 /* This makes no sense on a RAID 0 */
1136 return(EINVAL);
1137 }
1138 if (raidPtr->parity_rewrite_in_progress == 1) {
1139 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1140 } else {
1141 *(int *) data = 100;
1142 }
1143 return (0);
1144
1145 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1146 if (raidPtr->Layout.map->faultsTolerated == 0) {
1147 /* This makes no sense on a RAID 0 */
1148 return(EINVAL);
1149 }
1150 if (raidPtr->copyback_in_progress == 1) {
1151 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1152 raidPtr->Layout.numStripe;
1153 } else {
1154 *(int *) data = 100;
1155 }
1156 return (0);
1157
1158
1159 /* the sparetable daemon calls this to wait for the kernel to
1160 * need a spare table. this ioctl does not return until a
1161 * spare table is needed. XXX -- calling mpsleep here in the
1162 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1163 * -- I should either compute the spare table in the kernel,
1164 * or have a different -- XXX XXX -- interface (a different
1165 * character device) for delivering the table -- XXX */
1166 #if 0
1167 case RAIDFRAME_SPARET_WAIT:
1168 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1169 while (!rf_sparet_wait_queue)
1170 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1171 waitreq = rf_sparet_wait_queue;
1172 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1173 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1174
1175 /* structure assignment */
1176 *((RF_SparetWait_t *) data) = *waitreq;
1177
1178 RF_Free(waitreq, sizeof(*waitreq));
1179 return (0);
1180
1181 /* wakes up a process waiting on SPARET_WAIT and puts an error
1182 * code in it that will cause the dameon to exit */
1183 case RAIDFRAME_ABORT_SPARET_WAIT:
1184 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1185 waitreq->fcol = -1;
1186 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1187 waitreq->next = rf_sparet_wait_queue;
1188 rf_sparet_wait_queue = waitreq;
1189 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1190 wakeup(&rf_sparet_wait_queue);
1191 return (0);
1192
1193 /* used by the spare table daemon to deliver a spare table
1194 * into the kernel */
1195 case RAIDFRAME_SEND_SPARET:
1196
1197 /* install the spare table */
1198 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1199
1200 /* respond to the requestor. the return status of the spare
1201 * table installation is passed in the "fcol" field */
1202 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1203 waitreq->fcol = retcode;
1204 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1205 waitreq->next = rf_sparet_resp_queue;
1206 rf_sparet_resp_queue = waitreq;
1207 wakeup(&rf_sparet_resp_queue);
1208 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1209
1210 return (retcode);
1211 #endif
1212
1213 default:
1214 break; /* fall through to the os-specific code below */
1215
1216 }
1217
1218 if (!raidPtr->valid)
1219 return (EINVAL);
1220
1221 /*
1222 * Add support for "regular" device ioctls here.
1223 */
1224
1225 switch (cmd) {
1226 case DIOCGDINFO:
1227 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1228 break;
1229
1230 case DIOCGPART:
1231 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1232 ((struct partinfo *) data)->part =
1233 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1234 break;
1235
1236 case DIOCWDINFO:
1237 case DIOCSDINFO:
1238 if ((error = raidlock(rs)) != 0)
1239 return (error);
1240
1241 rs->sc_flags |= RAIDF_LABELLING;
1242
1243 error = setdisklabel(rs->sc_dkdev.dk_label,
1244 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1245 if (error == 0) {
1246 if (cmd == DIOCWDINFO)
1247 error = writedisklabel(RAIDLABELDEV(dev),
1248 raidstrategy, rs->sc_dkdev.dk_label,
1249 rs->sc_dkdev.dk_cpulabel);
1250 }
1251 rs->sc_flags &= ~RAIDF_LABELLING;
1252
1253 raidunlock(rs);
1254
1255 if (error)
1256 return (error);
1257 break;
1258
1259 case DIOCWLABEL:
1260 if (*(int *) data != 0)
1261 rs->sc_flags |= RAIDF_WLABEL;
1262 else
1263 rs->sc_flags &= ~RAIDF_WLABEL;
1264 break;
1265
1266 case DIOCGDEFLABEL:
1267 raidgetdefaultlabel(raidPtr, rs,
1268 (struct disklabel *) data);
1269 break;
1270
1271 default:
1272 retcode = ENOTTY;
1273 }
1274 return (retcode);
1275
1276 }
1277
1278
1279 /* raidinit -- complete the rest of the initialization for the
1280 RAIDframe device. */
1281
1282
1283 static int
1284 raidinit(dev, raidPtr, unit)
1285 dev_t dev;
1286 RF_Raid_t *raidPtr;
1287 int unit;
1288 {
1289 int retcode;
1290 struct raid_softc *rs;
1291
1292 retcode = 0;
1293
1294 rs = &raid_softc[unit];
1295 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1296 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1297
1298
1299 /* XXX should check return code first... */
1300 rs->sc_flags |= RAIDF_INITED;
1301
1302 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1303
1304 rs->sc_dkdev.dk_name = rs->sc_xname;
1305
1306 /* disk_attach actually creates space for the CPU disklabel, among
1307 * other things, so it's critical to call this *BEFORE* we try putzing
1308 * with disklabels. */
1309
1310 disk_attach(&rs->sc_dkdev);
1311
1312 /* XXX There may be a weird interaction here between this, and
1313 * protectedSectors, as used in RAIDframe. */
1314
1315 rs->sc_size = raidPtr->totalSectors;
1316 rs->sc_dev = dev;
1317
1318 return (retcode);
1319 }
1320
1321 /* wake up the daemon & tell it to get us a spare table
1322 * XXX
1323 * the entries in the queues should be tagged with the raidPtr
1324 * so that in the extremely rare case that two recons happen at once,
1325 * we know for which device were requesting a spare table
1326 * XXX
1327 *
1328 * XXX This code is not currently used. GO
1329 */
1330 int
1331 rf_GetSpareTableFromDaemon(req)
1332 RF_SparetWait_t *req;
1333 {
1334 int retcode;
1335
1336 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1337 req->next = rf_sparet_wait_queue;
1338 rf_sparet_wait_queue = req;
1339 wakeup(&rf_sparet_wait_queue);
1340
1341 /* mpsleep unlocks the mutex */
1342 while (!rf_sparet_resp_queue) {
1343 tsleep(&rf_sparet_resp_queue, PRIBIO,
1344 "raidframe getsparetable", 0);
1345 }
1346 req = rf_sparet_resp_queue;
1347 rf_sparet_resp_queue = req->next;
1348 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1349
1350 retcode = req->fcol;
1351 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1352 * alloc'd */
1353 return (retcode);
1354 }
1355
1356 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1357 * bp & passes it down.
1358 * any calls originating in the kernel must use non-blocking I/O
1359 * do some extra sanity checking to return "appropriate" error values for
1360 * certain conditions (to make some standard utilities work)
1361 *
1362 * Formerly known as: rf_DoAccessKernel
1363 */
1364 void
1365 raidstart(raidPtr)
1366 RF_Raid_t *raidPtr;
1367 {
1368 RF_SectorCount_t num_blocks, pb, sum;
1369 RF_RaidAddr_t raid_addr;
1370 int retcode;
1371 struct partition *pp;
1372 daddr_t blocknum;
1373 int unit;
1374 struct raid_softc *rs;
1375 int do_async;
1376 struct buf *bp;
1377 struct buf *dp;
1378
1379 unit = raidPtr->raidid;
1380 rs = &raid_softc[unit];
1381
1382 /* Check to see if we're at the limit... */
1383 RF_LOCK_MUTEX(raidPtr->mutex);
1384 while (raidPtr->openings > 0) {
1385 RF_UNLOCK_MUTEX(raidPtr->mutex);
1386
1387 /* get the next item, if any, from the queue */
1388 dp = &rs->buf_queue;
1389 bp = dp->b_actf;
1390 if (bp == NULL) {
1391 /* nothing more to do */
1392 return;
1393 }
1394
1395 /* update structures */
1396 dp = bp->b_actf;
1397 if (dp != NULL) {
1398 dp->b_actb = bp->b_actb;
1399 } else {
1400 rs->buf_queue.b_actb = bp->b_actb;
1401 }
1402 *bp->b_actb = dp;
1403
1404 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1405 * partition.. Need to make it absolute to the underlying
1406 * device.. */
1407
1408 blocknum = bp->b_blkno;
1409 if (DISKPART(bp->b_dev) != RAW_PART) {
1410 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1411 blocknum += pp->p_offset;
1412 }
1413
1414 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1415 (int) blocknum));
1416
1417 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1418 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1419
1420 /* *THIS* is where we adjust what block we're going to...
1421 * but DO NOT TOUCH bp->b_blkno!!! */
1422 raid_addr = blocknum;
1423
1424 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1425 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1426 sum = raid_addr + num_blocks + pb;
1427 if (1 || rf_debugKernelAccess) {
1428 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1429 (int) raid_addr, (int) sum, (int) num_blocks,
1430 (int) pb, (int) bp->b_resid));
1431 }
1432 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1433 || (sum < num_blocks) || (sum < pb)) {
1434 bp->b_error = ENOSPC;
1435 bp->b_flags |= B_ERROR;
1436 bp->b_resid = bp->b_bcount;
1437 biodone(bp);
1438 RF_LOCK_MUTEX(raidPtr->mutex);
1439 continue;
1440 }
1441 /*
1442 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1443 */
1444
1445 if (bp->b_bcount & raidPtr->sectorMask) {
1446 bp->b_error = EINVAL;
1447 bp->b_flags |= B_ERROR;
1448 bp->b_resid = bp->b_bcount;
1449 biodone(bp);
1450 RF_LOCK_MUTEX(raidPtr->mutex);
1451 continue;
1452
1453 }
1454 db1_printf(("Calling DoAccess..\n"));
1455
1456
1457 RF_LOCK_MUTEX(raidPtr->mutex);
1458 raidPtr->openings--;
1459 RF_UNLOCK_MUTEX(raidPtr->mutex);
1460
1461 /*
1462 * Everything is async.
1463 */
1464 do_async = 1;
1465
1466 /* don't ever condition on bp->b_flags & B_WRITE.
1467 * always condition on B_READ instead */
1468
1469 /* XXX we're still at splbio() here... do we *really*
1470 need to be? */
1471
1472
1473 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1474 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1475 do_async, raid_addr, num_blocks,
1476 bp->b_un.b_addr, bp, NULL, NULL,
1477 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1478
1479
1480 RF_LOCK_MUTEX(raidPtr->mutex);
1481 }
1482 RF_UNLOCK_MUTEX(raidPtr->mutex);
1483 }
1484
1485
1486
1487
1488 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1489
1490 int
1491 rf_DispatchKernelIO(queue, req)
1492 RF_DiskQueue_t *queue;
1493 RF_DiskQueueData_t *req;
1494 {
1495 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1496 struct buf *bp;
1497 struct raidbuf *raidbp = NULL;
1498 struct raid_softc *rs;
1499 int unit;
1500 int s;
1501
1502 s=0;
1503 /* s = splbio();*/ /* want to test this */
1504 /* XXX along with the vnode, we also need the softc associated with
1505 * this device.. */
1506
1507 req->queue = queue;
1508
1509 unit = queue->raidPtr->raidid;
1510
1511 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1512
1513 if (unit >= numraid) {
1514 printf("Invalid unit number: %d %d\n", unit, numraid);
1515 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1516 }
1517 rs = &raid_softc[unit];
1518
1519 /* XXX is this the right place? */
1520 disk_busy(&rs->sc_dkdev);
1521
1522 bp = req->bp;
1523 #if 1
1524 /* XXX when there is a physical disk failure, someone is passing us a
1525 * buffer that contains old stuff!! Attempt to deal with this problem
1526 * without taking a performance hit... (not sure where the real bug
1527 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1528
1529 if (bp->b_flags & B_ERROR) {
1530 bp->b_flags &= ~B_ERROR;
1531 }
1532 if (bp->b_error != 0) {
1533 bp->b_error = 0;
1534 }
1535 #endif
1536 raidbp = RAIDGETBUF(rs);
1537
1538 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1539
1540 /*
1541 * context for raidiodone
1542 */
1543 raidbp->rf_obp = bp;
1544 raidbp->req = req;
1545
1546 LIST_INIT(&raidbp->rf_buf.b_dep);
1547
1548 switch (req->type) {
1549 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1550 /* XXX need to do something extra here.. */
1551 /* I'm leaving this in, as I've never actually seen it used,
1552 * and I'd like folks to report it... GO */
1553 printf(("WAKEUP CALLED\n"));
1554 queue->numOutstanding++;
1555
1556 /* XXX need to glue the original buffer into this?? */
1557
1558 KernelWakeupFunc(&raidbp->rf_buf);
1559 break;
1560
1561 case RF_IO_TYPE_READ:
1562 case RF_IO_TYPE_WRITE:
1563
1564 if (req->tracerec) {
1565 RF_ETIMER_START(req->tracerec->timer);
1566 }
1567 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1568 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1569 req->sectorOffset, req->numSector,
1570 req->buf, KernelWakeupFunc, (void *) req,
1571 queue->raidPtr->logBytesPerSector, req->b_proc);
1572
1573 if (rf_debugKernelAccess) {
1574 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1575 (long) bp->b_blkno));
1576 }
1577 queue->numOutstanding++;
1578 queue->last_deq_sector = req->sectorOffset;
1579 /* acc wouldn't have been let in if there were any pending
1580 * reqs at any other priority */
1581 queue->curPriority = req->priority;
1582
1583 db1_printf(("Going for %c to unit %d row %d col %d\n",
1584 req->type, unit, queue->row, queue->col));
1585 db1_printf(("sector %d count %d (%d bytes) %d\n",
1586 (int) req->sectorOffset, (int) req->numSector,
1587 (int) (req->numSector <<
1588 queue->raidPtr->logBytesPerSector),
1589 (int) queue->raidPtr->logBytesPerSector));
1590 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1591 raidbp->rf_buf.b_vp->v_numoutput++;
1592 }
1593 VOP_STRATEGY(&raidbp->rf_buf);
1594
1595 break;
1596
1597 default:
1598 panic("bad req->type in rf_DispatchKernelIO");
1599 }
1600 db1_printf(("Exiting from DispatchKernelIO\n"));
1601 /* splx(s); */ /* want to test this */
1602 return (0);
1603 }
1604 /* this is the callback function associated with a I/O invoked from
1605 kernel code.
1606 */
1607 static void
1608 KernelWakeupFunc(vbp)
1609 struct buf *vbp;
1610 {
1611 RF_DiskQueueData_t *req = NULL;
1612 RF_DiskQueue_t *queue;
1613 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1614 struct buf *bp;
1615 struct raid_softc *rs;
1616 int unit;
1617 register int s;
1618
1619 s = splbio();
1620 db1_printf(("recovering the request queue:\n"));
1621 req = raidbp->req;
1622
1623 bp = raidbp->rf_obp;
1624
1625 queue = (RF_DiskQueue_t *) req->queue;
1626
1627 if (raidbp->rf_buf.b_flags & B_ERROR) {
1628 bp->b_flags |= B_ERROR;
1629 bp->b_error = raidbp->rf_buf.b_error ?
1630 raidbp->rf_buf.b_error : EIO;
1631 }
1632
1633 /* XXX methinks this could be wrong... */
1634 #if 1
1635 bp->b_resid = raidbp->rf_buf.b_resid;
1636 #endif
1637
1638 if (req->tracerec) {
1639 RF_ETIMER_STOP(req->tracerec->timer);
1640 RF_ETIMER_EVAL(req->tracerec->timer);
1641 RF_LOCK_MUTEX(rf_tracing_mutex);
1642 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1643 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1644 req->tracerec->num_phys_ios++;
1645 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1646 }
1647 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1648
1649 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1650
1651
1652 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1653 * ballistic, and mark the component as hosed... */
1654
1655 if (bp->b_flags & B_ERROR) {
1656 /* Mark the disk as dead */
1657 /* but only mark it once... */
1658 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1659 rf_ds_optimal) {
1660 printf("raid%d: IO Error. Marking %s as failed.\n",
1661 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1662 queue->raidPtr->Disks[queue->row][queue->col].status =
1663 rf_ds_failed;
1664 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1665 queue->raidPtr->numFailures++;
1666 /* XXX here we should bump the version number for each component, and write that data out */
1667 } else { /* Disk is already dead... */
1668 /* printf("Disk already marked as dead!\n"); */
1669 }
1670
1671 }
1672
1673 rs = &raid_softc[unit];
1674 RAIDPUTBUF(rs, raidbp);
1675
1676
1677 if (bp->b_resid == 0) {
1678 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1679 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1680 }
1681
1682 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1683 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1684
1685 splx(s);
1686 }
1687
1688
1689
1690 /*
1691 * initialize a buf structure for doing an I/O in the kernel.
1692 */
1693 static void
1694 InitBP(
1695 struct buf * bp,
1696 struct vnode * b_vp,
1697 unsigned rw_flag,
1698 dev_t dev,
1699 RF_SectorNum_t startSect,
1700 RF_SectorCount_t numSect,
1701 caddr_t buf,
1702 void (*cbFunc) (struct buf *),
1703 void *cbArg,
1704 int logBytesPerSector,
1705 struct proc * b_proc)
1706 {
1707 /* bp->b_flags = B_PHYS | rw_flag; */
1708 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1709 bp->b_bcount = numSect << logBytesPerSector;
1710 bp->b_bufsize = bp->b_bcount;
1711 bp->b_error = 0;
1712 bp->b_dev = dev;
1713 bp->b_un.b_addr = buf;
1714 bp->b_blkno = startSect;
1715 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1716 if (bp->b_bcount == 0) {
1717 panic("bp->b_bcount is zero in InitBP!!\n");
1718 }
1719 bp->b_proc = b_proc;
1720 bp->b_iodone = cbFunc;
1721 bp->b_vp = b_vp;
1722
1723 }
1724
1725 static void
1726 raidgetdefaultlabel(raidPtr, rs, lp)
1727 RF_Raid_t *raidPtr;
1728 struct raid_softc *rs;
1729 struct disklabel *lp;
1730 {
1731 db1_printf(("Building a default label...\n"));
1732 bzero(lp, sizeof(*lp));
1733
1734 /* fabricate a label... */
1735 lp->d_secperunit = raidPtr->totalSectors;
1736 lp->d_secsize = raidPtr->bytesPerSector;
1737 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1738 lp->d_ntracks = 1;
1739 lp->d_ncylinders = raidPtr->totalSectors /
1740 (lp->d_nsectors * lp->d_ntracks);
1741 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1742
1743 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1744 lp->d_type = DTYPE_RAID;
1745 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1746 lp->d_rpm = 3600;
1747 lp->d_interleave = 1;
1748 lp->d_flags = 0;
1749
1750 lp->d_partitions[RAW_PART].p_offset = 0;
1751 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1752 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1753 lp->d_npartitions = RAW_PART + 1;
1754
1755 lp->d_magic = DISKMAGIC;
1756 lp->d_magic2 = DISKMAGIC;
1757 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1758
1759 }
1760 /*
1761 * Read the disklabel from the raid device. If one is not present, fake one
1762 * up.
1763 */
1764 static void
1765 raidgetdisklabel(dev)
1766 dev_t dev;
1767 {
1768 int unit = raidunit(dev);
1769 struct raid_softc *rs = &raid_softc[unit];
1770 char *errstring;
1771 struct disklabel *lp = rs->sc_dkdev.dk_label;
1772 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1773 RF_Raid_t *raidPtr;
1774
1775 db1_printf(("Getting the disklabel...\n"));
1776
1777 bzero(clp, sizeof(*clp));
1778
1779 raidPtr = raidPtrs[unit];
1780
1781 raidgetdefaultlabel(raidPtr, rs, lp);
1782
1783 /*
1784 * Call the generic disklabel extraction routine.
1785 */
1786 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1787 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1788 if (errstring)
1789 raidmakedisklabel(rs);
1790 else {
1791 int i;
1792 struct partition *pp;
1793
1794 /*
1795 * Sanity check whether the found disklabel is valid.
1796 *
1797 * This is necessary since total size of the raid device
1798 * may vary when an interleave is changed even though exactly
1799 * same componets are used, and old disklabel may used
1800 * if that is found.
1801 */
1802 if (lp->d_secperunit != rs->sc_size)
1803 printf("WARNING: %s: "
1804 "total sector size in disklabel (%d) != "
1805 "the size of raid (%ld)\n", rs->sc_xname,
1806 lp->d_secperunit, (long) rs->sc_size);
1807 for (i = 0; i < lp->d_npartitions; i++) {
1808 pp = &lp->d_partitions[i];
1809 if (pp->p_offset + pp->p_size > rs->sc_size)
1810 printf("WARNING: %s: end of partition `%c' "
1811 "exceeds the size of raid (%ld)\n",
1812 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1813 }
1814 }
1815
1816 }
1817 /*
1818 * Take care of things one might want to take care of in the event
1819 * that a disklabel isn't present.
1820 */
1821 static void
1822 raidmakedisklabel(rs)
1823 struct raid_softc *rs;
1824 {
1825 struct disklabel *lp = rs->sc_dkdev.dk_label;
1826 db1_printf(("Making a label..\n"));
1827
1828 /*
1829 * For historical reasons, if there's no disklabel present
1830 * the raw partition must be marked FS_BSDFFS.
1831 */
1832
1833 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1834
1835 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1836
1837 lp->d_checksum = dkcksum(lp);
1838 }
1839 /*
1840 * Lookup the provided name in the filesystem. If the file exists,
1841 * is a valid block device, and isn't being used by anyone else,
1842 * set *vpp to the file's vnode.
1843 * You'll find the original of this in ccd.c
1844 */
1845 int
1846 raidlookup(path, p, vpp)
1847 char *path;
1848 struct proc *p;
1849 struct vnode **vpp; /* result */
1850 {
1851 struct nameidata nd;
1852 struct vnode *vp;
1853 struct vattr va;
1854 int error;
1855
1856 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1857 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1858 #ifdef DEBUG
1859 printf("RAIDframe: vn_open returned %d\n", error);
1860 #endif
1861 return (error);
1862 }
1863 vp = nd.ni_vp;
1864 if (vp->v_usecount > 1) {
1865 VOP_UNLOCK(vp, 0);
1866 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1867 return (EBUSY);
1868 }
1869 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1870 VOP_UNLOCK(vp, 0);
1871 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1872 return (error);
1873 }
1874 /* XXX: eventually we should handle VREG, too. */
1875 if (va.va_type != VBLK) {
1876 VOP_UNLOCK(vp, 0);
1877 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1878 return (ENOTBLK);
1879 }
1880 VOP_UNLOCK(vp, 0);
1881 *vpp = vp;
1882 return (0);
1883 }
1884 /*
1885 * Wait interruptibly for an exclusive lock.
1886 *
1887 * XXX
1888 * Several drivers do this; it should be abstracted and made MP-safe.
1889 * (Hmm... where have we seen this warning before :-> GO )
1890 */
1891 static int
1892 raidlock(rs)
1893 struct raid_softc *rs;
1894 {
1895 int error;
1896
1897 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1898 rs->sc_flags |= RAIDF_WANTED;
1899 if ((error =
1900 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1901 return (error);
1902 }
1903 rs->sc_flags |= RAIDF_LOCKED;
1904 return (0);
1905 }
1906 /*
1907 * Unlock and wake up any waiters.
1908 */
1909 static void
1910 raidunlock(rs)
1911 struct raid_softc *rs;
1912 {
1913
1914 rs->sc_flags &= ~RAIDF_LOCKED;
1915 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1916 rs->sc_flags &= ~RAIDF_WANTED;
1917 wakeup(rs);
1918 }
1919 }
1920
1921
1922 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1923 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1924
1925 int
1926 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1927 {
1928 RF_ComponentLabel_t component_label;
1929 raidread_component_label(dev, b_vp, &component_label);
1930 component_label.mod_counter = mod_counter;
1931 component_label.clean = RF_RAID_CLEAN;
1932 raidwrite_component_label(dev, b_vp, &component_label);
1933 return(0);
1934 }
1935
1936
1937 int
1938 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1939 {
1940 RF_ComponentLabel_t component_label;
1941 raidread_component_label(dev, b_vp, &component_label);
1942 component_label.mod_counter = mod_counter;
1943 component_label.clean = RF_RAID_DIRTY;
1944 raidwrite_component_label(dev, b_vp, &component_label);
1945 return(0);
1946 }
1947
1948 /* ARGSUSED */
1949 int
1950 raidread_component_label(dev, b_vp, component_label)
1951 dev_t dev;
1952 struct vnode *b_vp;
1953 RF_ComponentLabel_t *component_label;
1954 {
1955 struct buf *bp;
1956 int error;
1957
1958 /* XXX should probably ensure that we don't try to do this if
1959 someone has changed rf_protected_sectors. */
1960
1961 /* get a block of the appropriate size... */
1962 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1963 bp->b_dev = dev;
1964
1965 /* get our ducks in a row for the read */
1966 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1967 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1968 bp->b_flags = B_BUSY | B_READ;
1969 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1970
1971 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1972
1973 error = biowait(bp);
1974
1975 if (!error) {
1976 memcpy(component_label, bp->b_un.b_addr,
1977 sizeof(RF_ComponentLabel_t));
1978 #if 0
1979 printf("raidread_component_label: got component label:\n");
1980 printf("Version: %d\n",component_label->version);
1981 printf("Serial Number: %d\n",component_label->serial_number);
1982 printf("Mod counter: %d\n",component_label->mod_counter);
1983 printf("Row: %d\n", component_label->row);
1984 printf("Column: %d\n", component_label->column);
1985 printf("Num Rows: %d\n", component_label->num_rows);
1986 printf("Num Columns: %d\n", component_label->num_columns);
1987 printf("Clean: %d\n", component_label->clean);
1988 printf("Status: %d\n", component_label->status);
1989 #endif
1990 } else {
1991 printf("Failed to read RAID component label!\n");
1992 }
1993
1994 bp->b_flags = B_INVAL | B_AGE;
1995 brelse(bp);
1996 return(error);
1997 }
1998 /* ARGSUSED */
1999 int
2000 raidwrite_component_label(dev, b_vp, component_label)
2001 dev_t dev;
2002 struct vnode *b_vp;
2003 RF_ComponentLabel_t *component_label;
2004 {
2005 struct buf *bp;
2006 int error;
2007
2008 /* get a block of the appropriate size... */
2009 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2010 bp->b_dev = dev;
2011
2012 /* get our ducks in a row for the write */
2013 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2014 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2015 bp->b_flags = B_BUSY | B_WRITE;
2016 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2017
2018 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2019
2020 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2021
2022 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2023 error = biowait(bp);
2024 bp->b_flags = B_INVAL | B_AGE;
2025 brelse(bp);
2026 if (error) {
2027 printf("Failed to write RAID component info!\n");
2028 }
2029
2030 return(error);
2031 }
2032
2033 void
2034 rf_markalldirty( raidPtr )
2035 RF_Raid_t *raidPtr;
2036 {
2037 RF_ComponentLabel_t c_label;
2038 int r,c;
2039
2040 raidPtr->mod_counter++;
2041 for (r = 0; r < raidPtr->numRow; r++) {
2042 for (c = 0; c < raidPtr->numCol; c++) {
2043 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2044 raidread_component_label(
2045 raidPtr->Disks[r][c].dev,
2046 raidPtr->raid_cinfo[r][c].ci_vp,
2047 &c_label);
2048 if (c_label.status == rf_ds_spared) {
2049 /* XXX do something special...
2050 but whatever you do, don't
2051 try to access it!! */
2052 } else {
2053 #if 0
2054 c_label.status =
2055 raidPtr->Disks[r][c].status;
2056 raidwrite_component_label(
2057 raidPtr->Disks[r][c].dev,
2058 raidPtr->raid_cinfo[r][c].ci_vp,
2059 &c_label);
2060 #endif
2061 raidmarkdirty(
2062 raidPtr->Disks[r][c].dev,
2063 raidPtr->raid_cinfo[r][c].ci_vp,
2064 raidPtr->mod_counter);
2065 }
2066 }
2067 }
2068 }
2069 /* printf("Component labels marked dirty.\n"); */
2070 #if 0
2071 for( c = 0; c < raidPtr->numSpare ; c++) {
2072 sparecol = raidPtr->numCol + c;
2073 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2074 /*
2075
2076 XXX this is where we get fancy and map this spare
2077 into it's correct spot in the array.
2078
2079 */
2080 /*
2081
2082 we claim this disk is "optimal" if it's
2083 rf_ds_used_spare, as that means it should be
2084 directly substitutable for the disk it replaced.
2085 We note that too...
2086
2087 */
2088
2089 for(i=0;i<raidPtr->numRow;i++) {
2090 for(j=0;j<raidPtr->numCol;j++) {
2091 if ((raidPtr->Disks[i][j].spareRow ==
2092 r) &&
2093 (raidPtr->Disks[i][j].spareCol ==
2094 sparecol)) {
2095 srow = r;
2096 scol = sparecol;
2097 break;
2098 }
2099 }
2100 }
2101
2102 raidread_component_label(
2103 raidPtr->Disks[r][sparecol].dev,
2104 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2105 &c_label);
2106 /* make sure status is noted */
2107 c_label.version = RF_COMPONENT_LABEL_VERSION;
2108 c_label.mod_counter = raidPtr->mod_counter;
2109 c_label.serial_number = raidPtr->serial_number;
2110 c_label.row = srow;
2111 c_label.column = scol;
2112 c_label.num_rows = raidPtr->numRow;
2113 c_label.num_columns = raidPtr->numCol;
2114 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2115 c_label.status = rf_ds_optimal;
2116 raidwrite_component_label(
2117 raidPtr->Disks[r][sparecol].dev,
2118 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2119 &c_label);
2120 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2121 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2122 }
2123 }
2124
2125 #endif
2126 }
2127
2128
2129 void
2130 rf_update_component_labels( raidPtr )
2131 RF_Raid_t *raidPtr;
2132 {
2133 RF_ComponentLabel_t c_label;
2134 int sparecol;
2135 int r,c;
2136 int i,j;
2137 int srow, scol;
2138
2139 srow = -1;
2140 scol = -1;
2141
2142 /* XXX should do extra checks to make sure things really are clean,
2143 rather than blindly setting the clean bit... */
2144
2145 raidPtr->mod_counter++;
2146
2147 for (r = 0; r < raidPtr->numRow; r++) {
2148 for (c = 0; c < raidPtr->numCol; c++) {
2149 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2150 raidread_component_label(
2151 raidPtr->Disks[r][c].dev,
2152 raidPtr->raid_cinfo[r][c].ci_vp,
2153 &c_label);
2154 /* make sure status is noted */
2155 c_label.status = rf_ds_optimal;
2156 raidwrite_component_label(
2157 raidPtr->Disks[r][c].dev,
2158 raidPtr->raid_cinfo[r][c].ci_vp,
2159 &c_label);
2160 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2161 raidmarkclean(
2162 raidPtr->Disks[r][c].dev,
2163 raidPtr->raid_cinfo[r][c].ci_vp,
2164 raidPtr->mod_counter);
2165 }
2166 }
2167 /* else we don't touch it.. */
2168 #if 0
2169 else if (raidPtr->Disks[r][c].status !=
2170 rf_ds_failed) {
2171 raidread_component_label(
2172 raidPtr->Disks[r][c].dev,
2173 raidPtr->raid_cinfo[r][c].ci_vp,
2174 &c_label);
2175 /* make sure status is noted */
2176 c_label.status =
2177 raidPtr->Disks[r][c].status;
2178 raidwrite_component_label(
2179 raidPtr->Disks[r][c].dev,
2180 raidPtr->raid_cinfo[r][c].ci_vp,
2181 &c_label);
2182 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2183 raidmarkclean(
2184 raidPtr->Disks[r][c].dev,
2185 raidPtr->raid_cinfo[r][c].ci_vp,
2186 raidPtr->mod_counter);
2187 }
2188 }
2189 #endif
2190 }
2191 }
2192
2193 for( c = 0; c < raidPtr->numSpare ; c++) {
2194 sparecol = raidPtr->numCol + c;
2195 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2196 /*
2197
2198 we claim this disk is "optimal" if it's
2199 rf_ds_used_spare, as that means it should be
2200 directly substitutable for the disk it replaced.
2201 We note that too...
2202
2203 */
2204
2205 for(i=0;i<raidPtr->numRow;i++) {
2206 for(j=0;j<raidPtr->numCol;j++) {
2207 if ((raidPtr->Disks[i][j].spareRow ==
2208 0) &&
2209 (raidPtr->Disks[i][j].spareCol ==
2210 sparecol)) {
2211 srow = i;
2212 scol = j;
2213 break;
2214 }
2215 }
2216 }
2217
2218 raidread_component_label(
2219 raidPtr->Disks[0][sparecol].dev,
2220 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2221 &c_label);
2222 /* make sure status is noted */
2223 c_label.version = RF_COMPONENT_LABEL_VERSION;
2224 c_label.mod_counter = raidPtr->mod_counter;
2225 c_label.serial_number = raidPtr->serial_number;
2226 c_label.row = srow;
2227 c_label.column = scol;
2228 c_label.num_rows = raidPtr->numRow;
2229 c_label.num_columns = raidPtr->numCol;
2230 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2231 c_label.status = rf_ds_optimal;
2232 raidwrite_component_label(
2233 raidPtr->Disks[0][sparecol].dev,
2234 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2235 &c_label);
2236 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2237 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2238 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2239 raidPtr->mod_counter);
2240 }
2241 }
2242 }
2243 /* printf("Component labels updated\n"); */
2244 }
2245
2246 void
2247 rf_ReconThread(req)
2248 struct rf_recon_req *req;
2249 {
2250 int s;
2251 RF_Raid_t *raidPtr;
2252
2253 s = splbio();
2254 raidPtr = (RF_Raid_t *) req->raidPtr;
2255 raidPtr->recon_in_progress = 1;
2256
2257 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2258 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2259
2260 /* XXX get rid of this! we don't need it at all.. */
2261 RF_Free(req, sizeof(*req));
2262
2263 raidPtr->recon_in_progress = 0;
2264 splx(s);
2265
2266 /* That's all... */
2267 kthread_exit(0); /* does not return */
2268 }
2269
2270 void
2271 rf_RewriteParityThread(raidPtr)
2272 RF_Raid_t *raidPtr;
2273 {
2274 int retcode;
2275 int s;
2276
2277 raidPtr->parity_rewrite_in_progress = 1;
2278 s = splbio();
2279 retcode = rf_RewriteParity(raidPtr);
2280 splx(s);
2281 if (retcode) {
2282 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2283 } else {
2284 /* set the clean bit! If we shutdown correctly,
2285 the clean bit on each component label will get
2286 set */
2287 raidPtr->parity_good = RF_RAID_CLEAN;
2288 }
2289 raidPtr->parity_rewrite_in_progress = 0;
2290
2291 /* That's all... */
2292 kthread_exit(0); /* does not return */
2293 }
2294
2295
2296 void
2297 rf_CopybackThread(raidPtr)
2298 RF_Raid_t *raidPtr;
2299 {
2300 int s;
2301
2302 raidPtr->copyback_in_progress = 1;
2303 s = splbio();
2304 rf_CopybackReconstructedData(raidPtr);
2305 splx(s);
2306 raidPtr->copyback_in_progress = 0;
2307
2308 /* That's all... */
2309 kthread_exit(0); /* does not return */
2310 }
2311
2312
2313 void
2314 rf_ReconstructInPlaceThread(req)
2315 struct rf_recon_req *req;
2316 {
2317 int retcode;
2318 int s;
2319 RF_Raid_t *raidPtr;
2320
2321 s = splbio();
2322 raidPtr = req->raidPtr;
2323 raidPtr->recon_in_progress = 1;
2324 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2325 RF_Free(req, sizeof(*req));
2326 raidPtr->recon_in_progress = 0;
2327 splx(s);
2328
2329 /* That's all... */
2330 kthread_exit(0); /* does not return */
2331 }
2332