rf_netbsdkintf.c revision 1.37 1 /* $NetBSD: rf_netbsdkintf.c,v 1.37 2000/01/05 02:57:29 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf * bp);
184 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
185 dev_t dev, RF_SectorNum_t startSect,
186 RF_SectorCount_t numSect, caddr_t buf,
187 void (*cbFunc) (struct buf *), void *cbArg,
188 int logBytesPerSector, struct proc * b_proc);
189
190 #define Dprintf0(s) if (rf_queueDebug) \
191 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
192 #define Dprintf1(s,a) if (rf_queueDebug) \
193 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
194 #define Dprintf2(s,a,b) if (rf_queueDebug) \
195 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
196 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
197 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
198
199 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
200 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
201
202 void raidattach __P((int));
203 int raidsize __P((dev_t));
204
205 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
206 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
207 static int raidinit __P((dev_t, RF_Raid_t *, int));
208
209 int raidopen __P((dev_t, int, int, struct proc *));
210 int raidclose __P((dev_t, int, int, struct proc *));
211 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
212 int raidwrite __P((dev_t, struct uio *, int));
213 int raidread __P((dev_t, struct uio *, int));
214 void raidstrategy __P((struct buf *));
215 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
216
217 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
218 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
219 void rf_update_component_labels( RF_Raid_t *);
220 /*
221 * Pilfered from ccd.c
222 */
223
224 struct raidbuf {
225 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
226 struct buf *rf_obp; /* ptr. to original I/O buf */
227 int rf_flags; /* misc. flags */
228 RF_DiskQueueData_t *req;/* the request that this was part of.. */
229 };
230
231
232 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
233 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
234
235 /* XXX Not sure if the following should be replacing the raidPtrs above,
236 or if it should be used in conjunction with that... */
237
238 struct raid_softc {
239 int sc_flags; /* flags */
240 int sc_cflags; /* configuration flags */
241 size_t sc_size; /* size of the raid device */
242 dev_t sc_dev; /* our device.. */
243 char sc_xname[20]; /* XXX external name */
244 struct disk sc_dkdev; /* generic disk device info */
245 struct pool sc_cbufpool; /* component buffer pool */
246 struct buf buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 static int numraid = 0;
257
258 /*
259 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
260 * Be aware that large numbers can allow the driver to consume a lot of
261 * kernel memory, especially on writes, and in degraded mode reads.
262 *
263 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
264 * a single 64K write will typically require 64K for the old data,
265 * 64K for the old parity, and 64K for the new parity, for a total
266 * of 192K (if the parity buffer is not re-used immediately).
267 * Even it if is used immedately, that's still 128K, which when multiplied
268 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
269 *
270 * Now in degraded mode, for example, a 64K read on the above setup may
271 * require data reconstruction, which will require *all* of the 4 remaining
272 * disks to participate -- 4 * 32K/disk == 128K again.
273 */
274
275 #ifndef RAIDOUTSTANDING
276 #define RAIDOUTSTANDING 6
277 #endif
278
279 #define RAIDLABELDEV(dev) \
280 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
281
282 /* declared here, and made public, for the benefit of KVM stuff.. */
283 struct raid_softc *raid_softc;
284
285 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
286 struct disklabel *));
287 static void raidgetdisklabel __P((dev_t));
288 static void raidmakedisklabel __P((struct raid_softc *));
289
290 static int raidlock __P((struct raid_softc *));
291 static void raidunlock __P((struct raid_softc *));
292 int raidlookup __P((char *, struct proc * p, struct vnode **));
293
294 static void rf_markalldirty __P((RF_Raid_t *));
295
296 void rf_ReconThread __P((struct rf_recon_req *));
297 /* XXX what I want is: */
298 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
299 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
300 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
301 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
302
303 void
304 raidattach(num)
305 int num;
306 {
307 int raidID;
308 int i, rc;
309
310 #ifdef DEBUG
311 printf("raidattach: Asked for %d units\n", num);
312 #endif
313
314 if (num <= 0) {
315 #ifdef DIAGNOSTIC
316 panic("raidattach: count <= 0");
317 #endif
318 return;
319 }
320 /* This is where all the initialization stuff gets done. */
321
322 /* Make some space for requested number of units... */
323
324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
325 if (raidPtrs == NULL) {
326 panic("raidPtrs is NULL!!\n");
327 }
328
329 rc = rf_mutex_init(&rf_sparet_wait_mutex);
330 if (rc) {
331 RF_PANIC();
332 }
333
334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
335
336 for (i = 0; i < numraid; i++)
337 raidPtrs[i] = NULL;
338 rc = rf_BootRaidframe();
339 if (rc == 0)
340 printf("Kernelized RAIDframe activated\n");
341 else
342 panic("Serious error booting RAID!!\n");
343
344 /* put together some datastructures like the CCD device does.. This
345 * lets us lock the device and what-not when it gets opened. */
346
347 raid_softc = (struct raid_softc *)
348 malloc(num * sizeof(struct raid_softc),
349 M_RAIDFRAME, M_NOWAIT);
350 if (raid_softc == NULL) {
351 printf("WARNING: no memory for RAIDframe driver\n");
352 return;
353 }
354 numraid = num;
355 bzero(raid_softc, num * sizeof(struct raid_softc));
356
357 for (raidID = 0; raidID < num; raidID++) {
358 raid_softc[raidID].buf_queue.b_actf = NULL;
359 raid_softc[raidID].buf_queue.b_actb =
360 &raid_softc[raidID].buf_queue.b_actf;
361 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
362 (RF_Raid_t *));
363 if (raidPtrs[raidID] == NULL) {
364 printf("raidPtrs[%d] is NULL\n", raidID);
365 }
366 }
367 }
368
369
370 int
371 raidsize(dev)
372 dev_t dev;
373 {
374 struct raid_softc *rs;
375 struct disklabel *lp;
376 int part, unit, omask, size;
377
378 unit = raidunit(dev);
379 if (unit >= numraid)
380 return (-1);
381 rs = &raid_softc[unit];
382
383 if ((rs->sc_flags & RAIDF_INITED) == 0)
384 return (-1);
385
386 part = DISKPART(dev);
387 omask = rs->sc_dkdev.dk_openmask & (1 << part);
388 lp = rs->sc_dkdev.dk_label;
389
390 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
391 return (-1);
392
393 if (lp->d_partitions[part].p_fstype != FS_SWAP)
394 size = -1;
395 else
396 size = lp->d_partitions[part].p_size *
397 (lp->d_secsize / DEV_BSIZE);
398
399 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
400 return (-1);
401
402 return (size);
403
404 }
405
406 int
407 raiddump(dev, blkno, va, size)
408 dev_t dev;
409 daddr_t blkno;
410 caddr_t va;
411 size_t size;
412 {
413 /* Not implemented. */
414 return ENXIO;
415 }
416 /* ARGSUSED */
417 int
418 raidopen(dev, flags, fmt, p)
419 dev_t dev;
420 int flags, fmt;
421 struct proc *p;
422 {
423 int unit = raidunit(dev);
424 struct raid_softc *rs;
425 struct disklabel *lp;
426 int part, pmask;
427 int error = 0;
428
429 if (unit >= numraid)
430 return (ENXIO);
431 rs = &raid_softc[unit];
432
433 if ((error = raidlock(rs)) != 0)
434 return (error);
435 lp = rs->sc_dkdev.dk_label;
436
437 part = DISKPART(dev);
438 pmask = (1 << part);
439
440 db1_printf(("Opening raid device number: %d partition: %d\n",
441 unit, part));
442
443
444 if ((rs->sc_flags & RAIDF_INITED) &&
445 (rs->sc_dkdev.dk_openmask == 0))
446 raidgetdisklabel(dev);
447
448 /* make sure that this partition exists */
449
450 if (part != RAW_PART) {
451 db1_printf(("Not a raw partition..\n"));
452 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
453 ((part >= lp->d_npartitions) ||
454 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
455 error = ENXIO;
456 raidunlock(rs);
457 db1_printf(("Bailing out...\n"));
458 return (error);
459 }
460 }
461 /* Prevent this unit from being unconfigured while open. */
462 switch (fmt) {
463 case S_IFCHR:
464 rs->sc_dkdev.dk_copenmask |= pmask;
465 break;
466
467 case S_IFBLK:
468 rs->sc_dkdev.dk_bopenmask |= pmask;
469 break;
470 }
471
472 if ((rs->sc_dkdev.dk_openmask == 0) &&
473 ((rs->sc_flags & RAIDF_INITED) != 0)) {
474 /* First one... mark things as dirty... Note that we *MUST*
475 have done a configure before this. I DO NOT WANT TO BE
476 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
477 THAT THEY BELONG TOGETHER!!!!! */
478 /* XXX should check to see if we're only open for reading
479 here... If so, we needn't do this, but then need some
480 other way of keeping track of what's happened.. */
481
482 rf_markalldirty( raidPtrs[unit] );
483 }
484
485
486 rs->sc_dkdev.dk_openmask =
487 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
488
489 raidunlock(rs);
490
491 return (error);
492
493
494 }
495 /* ARGSUSED */
496 int
497 raidclose(dev, flags, fmt, p)
498 dev_t dev;
499 int flags, fmt;
500 struct proc *p;
501 {
502 int unit = raidunit(dev);
503 struct raid_softc *rs;
504 int error = 0;
505 int part;
506
507 if (unit >= numraid)
508 return (ENXIO);
509 rs = &raid_softc[unit];
510
511 if ((error = raidlock(rs)) != 0)
512 return (error);
513
514 part = DISKPART(dev);
515
516 /* ...that much closer to allowing unconfiguration... */
517 switch (fmt) {
518 case S_IFCHR:
519 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
520 break;
521
522 case S_IFBLK:
523 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
524 break;
525 }
526 rs->sc_dkdev.dk_openmask =
527 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
528
529 if ((rs->sc_dkdev.dk_openmask == 0) &&
530 ((rs->sc_flags & RAIDF_INITED) != 0)) {
531 /* Last one... device is not unconfigured yet.
532 Device shutdown has taken care of setting the
533 clean bits if RAIDF_INITED is not set
534 mark things as clean... */
535 rf_update_component_labels( raidPtrs[unit] );
536 }
537
538 raidunlock(rs);
539 return (0);
540
541 }
542
543 void
544 raidstrategy(bp)
545 register struct buf *bp;
546 {
547 register int s;
548
549 unsigned int raidID = raidunit(bp->b_dev);
550 RF_Raid_t *raidPtr;
551 struct raid_softc *rs = &raid_softc[raidID];
552 struct disklabel *lp;
553 struct buf *dp;
554 int wlabel;
555
556 if ((rs->sc_flags & RAIDF_INITED) ==0) {
557 bp->b_error = ENXIO;
558 bp->b_flags = B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 if (raidID >= numraid || !raidPtrs[raidID]) {
564 bp->b_error = ENODEV;
565 bp->b_flags |= B_ERROR;
566 bp->b_resid = bp->b_bcount;
567 biodone(bp);
568 return;
569 }
570 raidPtr = raidPtrs[raidID];
571 if (!raidPtr->valid) {
572 bp->b_error = ENODEV;
573 bp->b_flags |= B_ERROR;
574 bp->b_resid = bp->b_bcount;
575 biodone(bp);
576 return;
577 }
578 if (bp->b_bcount == 0) {
579 db1_printf(("b_bcount is zero..\n"));
580 biodone(bp);
581 return;
582 }
583 lp = rs->sc_dkdev.dk_label;
584
585 /*
586 * Do bounds checking and adjust transfer. If there's an
587 * error, the bounds check will flag that for us.
588 */
589
590 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
591 if (DISKPART(bp->b_dev) != RAW_PART)
592 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
593 db1_printf(("Bounds check failed!!:%d %d\n",
594 (int) bp->b_blkno, (int) wlabel));
595 biodone(bp);
596 return;
597 }
598 s = splbio();
599
600 bp->b_resid = 0;
601
602 /* stuff it onto our queue */
603
604 dp = &rs->buf_queue;
605 bp->b_actf = NULL;
606 bp->b_actb = dp->b_actb;
607 *dp->b_actb = bp;
608 dp->b_actb = &bp->b_actf;
609
610 raidstart(raidPtrs[raidID]);
611
612 splx(s);
613 }
614 /* ARGSUSED */
615 int
616 raidread(dev, uio, flags)
617 dev_t dev;
618 struct uio *uio;
619 int flags;
620 {
621 int unit = raidunit(dev);
622 struct raid_softc *rs;
623 int part;
624
625 if (unit >= numraid)
626 return (ENXIO);
627 rs = &raid_softc[unit];
628
629 if ((rs->sc_flags & RAIDF_INITED) == 0)
630 return (ENXIO);
631 part = DISKPART(dev);
632
633 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
634
635 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
636
637 }
638 /* ARGSUSED */
639 int
640 raidwrite(dev, uio, flags)
641 dev_t dev;
642 struct uio *uio;
643 int flags;
644 {
645 int unit = raidunit(dev);
646 struct raid_softc *rs;
647
648 if (unit >= numraid)
649 return (ENXIO);
650 rs = &raid_softc[unit];
651
652 if ((rs->sc_flags & RAIDF_INITED) == 0)
653 return (ENXIO);
654 db1_printf(("raidwrite\n"));
655 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
656
657 }
658
659 int
660 raidioctl(dev, cmd, data, flag, p)
661 dev_t dev;
662 u_long cmd;
663 caddr_t data;
664 int flag;
665 struct proc *p;
666 {
667 int unit = raidunit(dev);
668 int error = 0;
669 int part, pmask;
670 struct raid_softc *rs;
671 RF_Config_t *k_cfg, *u_cfg;
672 u_char *specific_buf;
673 int retcode = 0;
674 int row;
675 int column;
676 struct rf_recon_req *rrcopy, *rr;
677 RF_ComponentLabel_t *component_label;
678 RF_ComponentLabel_t ci_label;
679 RF_ComponentLabel_t **c_label_ptr;
680 RF_SingleComponent_t *sparePtr,*componentPtr;
681 RF_SingleComponent_t hot_spare;
682 RF_SingleComponent_t component;
683
684 if (unit >= numraid)
685 return (ENXIO);
686 rs = &raid_softc[unit];
687
688 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
689 (int) DISKPART(dev), (int) unit, (int) cmd));
690
691 /* Must be open for writes for these commands... */
692 switch (cmd) {
693 case DIOCSDINFO:
694 case DIOCWDINFO:
695 case DIOCWLABEL:
696 if ((flag & FWRITE) == 0)
697 return (EBADF);
698 }
699
700 /* Must be initialized for these... */
701 switch (cmd) {
702 case DIOCGDINFO:
703 case DIOCSDINFO:
704 case DIOCWDINFO:
705 case DIOCGPART:
706 case DIOCWLABEL:
707 case DIOCGDEFLABEL:
708 case RAIDFRAME_SHUTDOWN:
709 case RAIDFRAME_REWRITEPARITY:
710 case RAIDFRAME_GET_INFO:
711 case RAIDFRAME_RESET_ACCTOTALS:
712 case RAIDFRAME_GET_ACCTOTALS:
713 case RAIDFRAME_KEEP_ACCTOTALS:
714 case RAIDFRAME_GET_SIZE:
715 case RAIDFRAME_FAIL_DISK:
716 case RAIDFRAME_COPYBACK:
717 case RAIDFRAME_CHECK_RECON_STATUS:
718 case RAIDFRAME_GET_COMPONENT_LABEL:
719 case RAIDFRAME_SET_COMPONENT_LABEL:
720 case RAIDFRAME_ADD_HOT_SPARE:
721 case RAIDFRAME_REMOVE_HOT_SPARE:
722 case RAIDFRAME_INIT_LABELS:
723 case RAIDFRAME_REBUILD_IN_PLACE:
724 case RAIDFRAME_CHECK_PARITY:
725 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
726 case RAIDFRAME_CHECK_COPYBACK_STATUS:
727 if ((rs->sc_flags & RAIDF_INITED) == 0)
728 return (ENXIO);
729 }
730
731 switch (cmd) {
732
733
734 /* configure the system */
735 case RAIDFRAME_CONFIGURE:
736
737 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
738 /* copy-in the configuration information */
739 /* data points to a pointer to the configuration structure */
740 u_cfg = *((RF_Config_t **) data);
741 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
742 if (k_cfg == NULL) {
743 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
744 return (ENOMEM);
745 }
746 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
747 sizeof(RF_Config_t));
748 if (retcode) {
749 RF_Free(k_cfg, sizeof(RF_Config_t));
750 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
751 retcode));
752 return (retcode);
753 }
754 /* allocate a buffer for the layout-specific data, and copy it
755 * in */
756 if (k_cfg->layoutSpecificSize) {
757 if (k_cfg->layoutSpecificSize > 10000) {
758 /* sanity check */
759 RF_Free(k_cfg, sizeof(RF_Config_t));
760 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
761 return (EINVAL);
762 }
763 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
764 (u_char *));
765 if (specific_buf == NULL) {
766 RF_Free(k_cfg, sizeof(RF_Config_t));
767 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
768 return (ENOMEM);
769 }
770 retcode = copyin(k_cfg->layoutSpecific,
771 (caddr_t) specific_buf,
772 k_cfg->layoutSpecificSize);
773 if (retcode) {
774 RF_Free(k_cfg, sizeof(RF_Config_t));
775 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
776 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
777 retcode));
778 return (retcode);
779 }
780 } else
781 specific_buf = NULL;
782 k_cfg->layoutSpecific = specific_buf;
783
784 /* should do some kind of sanity check on the configuration.
785 * Store the sum of all the bytes in the last byte? */
786
787 /* configure the system */
788
789 raidPtrs[unit]->raidid = unit;
790
791 retcode = rf_Configure(raidPtrs[unit], k_cfg);
792
793 /* allow this many simultaneous IO's to this RAID device */
794 raidPtrs[unit]->openings = RAIDOUTSTANDING;
795
796 /* XXX should be moved to rf_Configure() */
797
798 raidPtrs[unit]->copyback_in_progress = 0;
799 raidPtrs[unit]->parity_rewrite_in_progress = 0;
800 raidPtrs[unit]->recon_in_progress = 0;
801
802 if (retcode == 0) {
803 retcode = raidinit(dev, raidPtrs[unit], unit);
804 rf_markalldirty( raidPtrs[unit] );
805 }
806 /* free the buffers. No return code here. */
807 if (k_cfg->layoutSpecificSize) {
808 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
809 }
810 RF_Free(k_cfg, sizeof(RF_Config_t));
811
812 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
813 retcode));
814
815 return (retcode);
816
817 /* shutdown the system */
818 case RAIDFRAME_SHUTDOWN:
819
820 if ((error = raidlock(rs)) != 0)
821 return (error);
822
823 /*
824 * If somebody has a partition mounted, we shouldn't
825 * shutdown.
826 */
827
828 part = DISKPART(dev);
829 pmask = (1 << part);
830 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
831 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
832 (rs->sc_dkdev.dk_copenmask & pmask))) {
833 raidunlock(rs);
834 return (EBUSY);
835 }
836
837 if (rf_debugKernelAccess) {
838 printf("call shutdown\n");
839 }
840
841 retcode = rf_Shutdown(raidPtrs[unit]);
842
843 db1_printf(("Done main shutdown\n"));
844
845 pool_destroy(&rs->sc_cbufpool);
846 db1_printf(("Done freeing component buffer freelist\n"));
847
848 /* It's no longer initialized... */
849 rs->sc_flags &= ~RAIDF_INITED;
850
851 /* Detach the disk. */
852 disk_detach(&rs->sc_dkdev);
853
854 raidunlock(rs);
855
856 return (retcode);
857 case RAIDFRAME_GET_COMPONENT_LABEL:
858 c_label_ptr = (RF_ComponentLabel_t **) data;
859 /* need to read the component label for the disk indicated
860 by row,column in component_label
861 XXX need to sanity check these values!!!
862 */
863
864 /* For practice, let's get it directly fromdisk, rather
865 than from the in-core copy */
866 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
867 (RF_ComponentLabel_t *));
868 if (component_label == NULL)
869 return (ENOMEM);
870
871 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
872
873 retcode = copyin( *c_label_ptr, component_label,
874 sizeof(RF_ComponentLabel_t));
875
876 if (retcode) {
877 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
878 return(retcode);
879 }
880
881 row = component_label->row;
882 column = component_label->column;
883
884 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
885 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
886 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
887 return(EINVAL);
888 }
889
890 raidread_component_label(
891 raidPtrs[unit]->Disks[row][column].dev,
892 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
893 component_label );
894
895 retcode = copyout((caddr_t) component_label,
896 (caddr_t) *c_label_ptr,
897 sizeof(RF_ComponentLabel_t));
898 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
899 return (retcode);
900
901 case RAIDFRAME_SET_COMPONENT_LABEL:
902 component_label = (RF_ComponentLabel_t *) data;
903
904 /* XXX check the label for valid stuff... */
905 /* Note that some things *should not* get modified --
906 the user should be re-initing the labels instead of
907 trying to patch things.
908 */
909
910 printf("Got component label:\n");
911 printf("Version: %d\n",component_label->version);
912 printf("Serial Number: %d\n",component_label->serial_number);
913 printf("Mod counter: %d\n",component_label->mod_counter);
914 printf("Row: %d\n", component_label->row);
915 printf("Column: %d\n", component_label->column);
916 printf("Num Rows: %d\n", component_label->num_rows);
917 printf("Num Columns: %d\n", component_label->num_columns);
918 printf("Clean: %d\n", component_label->clean);
919 printf("Status: %d\n", component_label->status);
920
921 row = component_label->row;
922 column = component_label->column;
923
924 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
925 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
926 return(EINVAL);
927 }
928
929 /* XXX this isn't allowed to do anything for now :-) */
930 #if 0
931 raidwrite_component_label(
932 raidPtrs[unit]->Disks[row][column].dev,
933 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
934 component_label );
935 #endif
936 return (0);
937
938 case RAIDFRAME_INIT_LABELS:
939 component_label = (RF_ComponentLabel_t *) data;
940 /*
941 we only want the serial number from
942 the above. We get all the rest of the information
943 from the config that was used to create this RAID
944 set.
945 */
946
947 raidPtrs[unit]->serial_number = component_label->serial_number;
948 /* current version number */
949 ci_label.version = RF_COMPONENT_LABEL_VERSION;
950 ci_label.serial_number = component_label->serial_number;
951 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
952 ci_label.num_rows = raidPtrs[unit]->numRow;
953 ci_label.num_columns = raidPtrs[unit]->numCol;
954 ci_label.clean = RF_RAID_DIRTY; /* not clean */
955 ci_label.status = rf_ds_optimal; /* "It's good!" */
956
957 for(row=0;row<raidPtrs[unit]->numRow;row++) {
958 ci_label.row = row;
959 for(column=0;column<raidPtrs[unit]->numCol;column++) {
960 ci_label.column = column;
961 raidwrite_component_label(
962 raidPtrs[unit]->Disks[row][column].dev,
963 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
964 &ci_label );
965 }
966 }
967
968 return (retcode);
969
970 /* initialize all parity */
971 case RAIDFRAME_REWRITEPARITY:
972
973 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
974 /* Parity for RAID 0 is trivially correct */
975 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
976 return(0);
977 }
978
979 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
980 /* Re-write is already in progress! */
981 return(EINVAL);
982 }
983
984 /* borrow the thread of the requesting process */
985
986 retcode = RF_CREATE_THREAD(raidPtrs[unit]->parity_rewrite_thread,
987 rf_RewriteParityThread,
988 raidPtrs[unit],"raid_parity");
989 return (retcode);
990
991
992 case RAIDFRAME_ADD_HOT_SPARE:
993 sparePtr = (RF_SingleComponent_t *) data;
994 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
995 printf("Adding spare\n");
996 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
997 return(retcode);
998
999 case RAIDFRAME_REMOVE_HOT_SPARE:
1000 return(retcode);
1001
1002 case RAIDFRAME_REBUILD_IN_PLACE:
1003
1004 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1005 /* Can't do this on a RAID 0!! */
1006 return(EINVAL);
1007 }
1008
1009 if (raidPtrs[unit]->recon_in_progress == 1) {
1010 /* a reconstruct is already in progress! */
1011 return(EINVAL);
1012 }
1013
1014 componentPtr = (RF_SingleComponent_t *) data;
1015 memcpy( &component, componentPtr,
1016 sizeof(RF_SingleComponent_t));
1017 row = component.row;
1018 column = component.column;
1019 printf("Rebuild: %d %d\n",row, column);
1020 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1021 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1022 return(EINVAL);
1023 }
1024
1025 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1026
1027 rrcopy->raidPtr = (void *) raidPtrs[unit];
1028 rrcopy->row = row;
1029 rrcopy->col = column;
1030
1031 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1032 rf_ReconstructInPlaceThread,
1033 rrcopy,"raid_reconip");
1034 return(retcode);
1035
1036 case RAIDFRAME_GET_INFO:
1037 {
1038 RF_Raid_t *raid = raidPtrs[unit];
1039 RF_DeviceConfig_t *cfg, **ucfgp;
1040 int i, j, d;
1041
1042 if (!raid->valid)
1043 return (ENODEV);
1044 ucfgp = (RF_DeviceConfig_t **) data;
1045 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1046 (RF_DeviceConfig_t *));
1047 if (cfg == NULL)
1048 return (ENOMEM);
1049 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1050 cfg->rows = raid->numRow;
1051 cfg->cols = raid->numCol;
1052 cfg->ndevs = raid->numRow * raid->numCol;
1053 if (cfg->ndevs >= RF_MAX_DISKS) {
1054 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1055 return (ENOMEM);
1056 }
1057 cfg->nspares = raid->numSpare;
1058 if (cfg->nspares >= RF_MAX_DISKS) {
1059 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1060 return (ENOMEM);
1061 }
1062 cfg->maxqdepth = raid->maxQueueDepth;
1063 d = 0;
1064 for (i = 0; i < cfg->rows; i++) {
1065 for (j = 0; j < cfg->cols; j++) {
1066 cfg->devs[d] = raid->Disks[i][j];
1067 d++;
1068 }
1069 }
1070 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1071 cfg->spares[i] = raid->Disks[0][j];
1072 }
1073 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1074 sizeof(RF_DeviceConfig_t));
1075 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1076
1077 return (retcode);
1078 }
1079 break;
1080 case RAIDFRAME_CHECK_PARITY:
1081 *(int *) data = raidPtrs[unit]->parity_good;
1082 return (0);
1083 case RAIDFRAME_RESET_ACCTOTALS:
1084 {
1085 RF_Raid_t *raid = raidPtrs[unit];
1086
1087 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1088 return (0);
1089 }
1090 break;
1091
1092 case RAIDFRAME_GET_ACCTOTALS:
1093 {
1094 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1095 RF_Raid_t *raid = raidPtrs[unit];
1096
1097 *totals = raid->acc_totals;
1098 return (0);
1099 }
1100 break;
1101
1102 case RAIDFRAME_KEEP_ACCTOTALS:
1103 {
1104 RF_Raid_t *raid = raidPtrs[unit];
1105 int *keep = (int *) data;
1106
1107 raid->keep_acc_totals = *keep;
1108 return (0);
1109 }
1110 break;
1111
1112 case RAIDFRAME_GET_SIZE:
1113 *(int *) data = raidPtrs[unit]->totalSectors;
1114 return (0);
1115
1116 /* fail a disk & optionally start reconstruction */
1117 case RAIDFRAME_FAIL_DISK:
1118
1119 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1120 /* Can't do this on a RAID 0!! */
1121 return(EINVAL);
1122 }
1123
1124 rr = (struct rf_recon_req *) data;
1125
1126 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1127 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1128 return (EINVAL);
1129
1130 printf("raid%d: Failing the disk: row: %d col: %d\n",
1131 unit, rr->row, rr->col);
1132
1133 /* make a copy of the recon request so that we don't rely on
1134 * the user's buffer */
1135 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1136 bcopy(rr, rrcopy, sizeof(*rr));
1137 rrcopy->raidPtr = (void *) raidPtrs[unit];
1138
1139 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1140 rf_ReconThread,
1141 rrcopy,"raid_recon");
1142 return (0);
1143
1144 /* invoke a copyback operation after recon on whatever disk
1145 * needs it, if any */
1146 case RAIDFRAME_COPYBACK:
1147
1148 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1149 /* This makes no sense on a RAID 0!! */
1150 return(EINVAL);
1151 }
1152
1153 if (raidPtrs[unit]->copyback_in_progress == 1) {
1154 /* Copyback is already in progress! */
1155 return(EINVAL);
1156 }
1157
1158 retcode = RF_CREATE_THREAD(raidPtrs[unit]->copyback_thread,
1159 rf_CopybackThread,
1160 raidPtrs[unit],"raid_copyback");
1161 return (retcode);
1162
1163 /* return the percentage completion of reconstruction */
1164 case RAIDFRAME_CHECK_RECON_STATUS:
1165 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1166 /* This makes no sense on a RAID 0 */
1167 return(EINVAL);
1168 }
1169 row = 0; /* XXX we only consider a single row... */
1170 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1171 *(int *) data = 100;
1172 else
1173 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1174 return (0);
1175
1176 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1177 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1178 /* This makes no sense on a RAID 0 */
1179 return(EINVAL);
1180 }
1181 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
1182 *(int *) data = 100 * raidPtrs[unit]->parity_rewrite_stripes_done / raidPtrs[unit]->Layout.numStripe;
1183 } else {
1184 *(int *) data = 100;
1185 }
1186 return (0);
1187
1188 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1189 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1190 /* This makes no sense on a RAID 0 */
1191 return(EINVAL);
1192 }
1193 if (raidPtrs[unit]->copyback_in_progress == 1) {
1194 *(int *) data = 100 * raidPtrs[unit]->copyback_stripes_done / raidPtrs[unit]->Layout.numStripe;
1195 } else {
1196 *(int *) data = 100;
1197 }
1198 return (0);
1199
1200
1201 /* the sparetable daemon calls this to wait for the kernel to
1202 * need a spare table. this ioctl does not return until a
1203 * spare table is needed. XXX -- calling mpsleep here in the
1204 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1205 * -- I should either compute the spare table in the kernel,
1206 * or have a different -- XXX XXX -- interface (a different
1207 * character device) for delivering the table -- XXX */
1208 #if 0
1209 case RAIDFRAME_SPARET_WAIT:
1210 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1211 while (!rf_sparet_wait_queue)
1212 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1213 waitreq = rf_sparet_wait_queue;
1214 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1215 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1216
1217 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1218
1219 RF_Free(waitreq, sizeof(*waitreq));
1220 return (0);
1221
1222
1223 /* wakes up a process waiting on SPARET_WAIT and puts an error
1224 * code in it that will cause the dameon to exit */
1225 case RAIDFRAME_ABORT_SPARET_WAIT:
1226 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1227 waitreq->fcol = -1;
1228 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1229 waitreq->next = rf_sparet_wait_queue;
1230 rf_sparet_wait_queue = waitreq;
1231 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1232 wakeup(&rf_sparet_wait_queue);
1233 return (0);
1234
1235 /* used by the spare table daemon to deliver a spare table
1236 * into the kernel */
1237 case RAIDFRAME_SEND_SPARET:
1238
1239 /* install the spare table */
1240 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1241
1242 /* respond to the requestor. the return status of the spare
1243 * table installation is passed in the "fcol" field */
1244 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1245 waitreq->fcol = retcode;
1246 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1247 waitreq->next = rf_sparet_resp_queue;
1248 rf_sparet_resp_queue = waitreq;
1249 wakeup(&rf_sparet_resp_queue);
1250 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1251
1252 return (retcode);
1253 #endif
1254
1255 default:
1256 break; /* fall through to the os-specific code below */
1257
1258 }
1259
1260 if (!raidPtrs[unit]->valid)
1261 return (EINVAL);
1262
1263 /*
1264 * Add support for "regular" device ioctls here.
1265 */
1266
1267 switch (cmd) {
1268 case DIOCGDINFO:
1269 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1270 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1271 break;
1272
1273 case DIOCGPART:
1274 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1275 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1276 ((struct partinfo *) data)->part =
1277 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1278 break;
1279
1280 case DIOCWDINFO:
1281 db1_printf(("DIOCWDINFO\n"));
1282 case DIOCSDINFO:
1283 db1_printf(("DIOCSDINFO\n"));
1284 if ((error = raidlock(rs)) != 0)
1285 return (error);
1286
1287 rs->sc_flags |= RAIDF_LABELLING;
1288
1289 error = setdisklabel(rs->sc_dkdev.dk_label,
1290 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1291 if (error == 0) {
1292 if (cmd == DIOCWDINFO)
1293 error = writedisklabel(RAIDLABELDEV(dev),
1294 raidstrategy, rs->sc_dkdev.dk_label,
1295 rs->sc_dkdev.dk_cpulabel);
1296 }
1297 rs->sc_flags &= ~RAIDF_LABELLING;
1298
1299 raidunlock(rs);
1300
1301 if (error)
1302 return (error);
1303 break;
1304
1305 case DIOCWLABEL:
1306 db1_printf(("DIOCWLABEL\n"));
1307 if (*(int *) data != 0)
1308 rs->sc_flags |= RAIDF_WLABEL;
1309 else
1310 rs->sc_flags &= ~RAIDF_WLABEL;
1311 break;
1312
1313 case DIOCGDEFLABEL:
1314 db1_printf(("DIOCGDEFLABEL\n"));
1315 raidgetdefaultlabel(raidPtrs[unit], rs,
1316 (struct disklabel *) data);
1317 break;
1318
1319 default:
1320 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1321 }
1322 return (retcode);
1323
1324 }
1325
1326
1327 /* raidinit -- complete the rest of the initialization for the
1328 RAIDframe device. */
1329
1330
1331 static int
1332 raidinit(dev, raidPtr, unit)
1333 dev_t dev;
1334 RF_Raid_t *raidPtr;
1335 int unit;
1336 {
1337 int retcode;
1338 /* int ix; */
1339 /* struct raidbuf *raidbp; */
1340 struct raid_softc *rs;
1341
1342 retcode = 0;
1343
1344 rs = &raid_softc[unit];
1345 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1346 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1347
1348
1349 /* XXX should check return code first... */
1350 rs->sc_flags |= RAIDF_INITED;
1351
1352 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1353
1354 rs->sc_dkdev.dk_name = rs->sc_xname;
1355
1356 /* disk_attach actually creates space for the CPU disklabel, among
1357 * other things, so it's critical to call this *BEFORE* we try putzing
1358 * with disklabels. */
1359
1360 disk_attach(&rs->sc_dkdev);
1361
1362 /* XXX There may be a weird interaction here between this, and
1363 * protectedSectors, as used in RAIDframe. */
1364
1365 rs->sc_size = raidPtr->totalSectors;
1366 rs->sc_dev = dev;
1367
1368 return (retcode);
1369 }
1370
1371 /* wake up the daemon & tell it to get us a spare table
1372 * XXX
1373 * the entries in the queues should be tagged with the raidPtr
1374 * so that in the extremely rare case that two recons happen at once,
1375 * we know for which device were requesting a spare table
1376 * XXX
1377 */
1378 int
1379 rf_GetSpareTableFromDaemon(req)
1380 RF_SparetWait_t *req;
1381 {
1382 int retcode;
1383
1384 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1385 req->next = rf_sparet_wait_queue;
1386 rf_sparet_wait_queue = req;
1387 wakeup(&rf_sparet_wait_queue);
1388
1389 /* mpsleep unlocks the mutex */
1390 while (!rf_sparet_resp_queue) {
1391 tsleep(&rf_sparet_resp_queue, PRIBIO,
1392 "raidframe getsparetable", 0);
1393 }
1394 req = rf_sparet_resp_queue;
1395 rf_sparet_resp_queue = req->next;
1396 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1397
1398 retcode = req->fcol;
1399 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1400 * alloc'd */
1401 return (retcode);
1402 }
1403 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1404 * bp & passes it down.
1405 * any calls originating in the kernel must use non-blocking I/O
1406 * do some extra sanity checking to return "appropriate" error values for
1407 * certain conditions (to make some standard utilities work)
1408 *
1409 * Formerly known as: rf_DoAccessKernel
1410 */
1411 void
1412 raidstart(raidPtr)
1413 RF_Raid_t *raidPtr;
1414 {
1415 RF_SectorCount_t num_blocks, pb, sum;
1416 RF_RaidAddr_t raid_addr;
1417 int retcode;
1418 struct partition *pp;
1419 daddr_t blocknum;
1420 int unit;
1421 struct raid_softc *rs;
1422 int do_async;
1423 struct buf *bp;
1424 struct buf *dp;
1425
1426 unit = raidPtr->raidid;
1427 rs = &raid_softc[unit];
1428
1429 /* Check to see if we're at the limit... */
1430 RF_LOCK_MUTEX(raidPtr->mutex);
1431 while (raidPtr->openings > 0) {
1432 RF_UNLOCK_MUTEX(raidPtr->mutex);
1433
1434 /* get the next item, if any, from the queue */
1435 dp = &rs->buf_queue;
1436 bp = dp->b_actf;
1437 if (bp == NULL) {
1438 /* nothing more to do */
1439 return;
1440 }
1441
1442 /* update structures */
1443 dp = bp->b_actf;
1444 if (dp != NULL) {
1445 dp->b_actb = bp->b_actb;
1446 } else {
1447 rs->buf_queue.b_actb = bp->b_actb;
1448 }
1449 *bp->b_actb = dp;
1450
1451 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1452 * partition.. Need to make it absolute to the underlying
1453 * device.. */
1454
1455 blocknum = bp->b_blkno;
1456 if (DISKPART(bp->b_dev) != RAW_PART) {
1457 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1458 blocknum += pp->p_offset;
1459 }
1460
1461 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1462 (int) blocknum));
1463
1464 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1465 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1466
1467 /* *THIS* is where we adjust what block we're going to...
1468 * but DO NOT TOUCH bp->b_blkno!!! */
1469 raid_addr = blocknum;
1470
1471 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1472 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1473 sum = raid_addr + num_blocks + pb;
1474 if (1 || rf_debugKernelAccess) {
1475 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1476 (int) raid_addr, (int) sum, (int) num_blocks,
1477 (int) pb, (int) bp->b_resid));
1478 }
1479 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1480 || (sum < num_blocks) || (sum < pb)) {
1481 bp->b_error = ENOSPC;
1482 bp->b_flags |= B_ERROR;
1483 bp->b_resid = bp->b_bcount;
1484 biodone(bp);
1485 RF_LOCK_MUTEX(raidPtr->mutex);
1486 continue;
1487 }
1488 /*
1489 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1490 */
1491
1492 if (bp->b_bcount & raidPtr->sectorMask) {
1493 bp->b_error = EINVAL;
1494 bp->b_flags |= B_ERROR;
1495 bp->b_resid = bp->b_bcount;
1496 biodone(bp);
1497 RF_LOCK_MUTEX(raidPtr->mutex);
1498 continue;
1499
1500 }
1501 db1_printf(("Calling DoAccess..\n"));
1502
1503
1504 RF_LOCK_MUTEX(raidPtr->mutex);
1505 raidPtr->openings--;
1506 RF_UNLOCK_MUTEX(raidPtr->mutex);
1507
1508 /*
1509 * Everything is async.
1510 */
1511 do_async = 1;
1512
1513 /* don't ever condition on bp->b_flags & B_WRITE.
1514 * always condition on B_READ instead */
1515
1516 /* XXX we're still at splbio() here... do we *really*
1517 need to be? */
1518
1519
1520 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1521 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1522 do_async, raid_addr, num_blocks,
1523 bp->b_un.b_addr, bp, NULL, NULL,
1524 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1525
1526
1527 RF_LOCK_MUTEX(raidPtr->mutex);
1528 }
1529 RF_UNLOCK_MUTEX(raidPtr->mutex);
1530 }
1531
1532
1533
1534
1535 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1536
1537 int
1538 rf_DispatchKernelIO(queue, req)
1539 RF_DiskQueue_t *queue;
1540 RF_DiskQueueData_t *req;
1541 {
1542 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1543 struct buf *bp;
1544 struct raidbuf *raidbp = NULL;
1545 struct raid_softc *rs;
1546 int unit;
1547 int s;
1548
1549 s=0;
1550 /* s = splbio();*/ /* want to test this */
1551 /* XXX along with the vnode, we also need the softc associated with
1552 * this device.. */
1553
1554 req->queue = queue;
1555
1556 unit = queue->raidPtr->raidid;
1557
1558 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1559
1560 if (unit >= numraid) {
1561 printf("Invalid unit number: %d %d\n", unit, numraid);
1562 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1563 }
1564 rs = &raid_softc[unit];
1565
1566 /* XXX is this the right place? */
1567 disk_busy(&rs->sc_dkdev);
1568
1569 bp = req->bp;
1570 #if 1
1571 /* XXX when there is a physical disk failure, someone is passing us a
1572 * buffer that contains old stuff!! Attempt to deal with this problem
1573 * without taking a performance hit... (not sure where the real bug
1574 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1575
1576 if (bp->b_flags & B_ERROR) {
1577 bp->b_flags &= ~B_ERROR;
1578 }
1579 if (bp->b_error != 0) {
1580 bp->b_error = 0;
1581 }
1582 #endif
1583 raidbp = RAIDGETBUF(rs);
1584
1585 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1586
1587 /*
1588 * context for raidiodone
1589 */
1590 raidbp->rf_obp = bp;
1591 raidbp->req = req;
1592
1593 LIST_INIT(&raidbp->rf_buf.b_dep);
1594
1595 switch (req->type) {
1596 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1597 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1598 * queue->row, queue->col); */
1599 /* XXX need to do something extra here.. */
1600 /* I'm leaving this in, as I've never actually seen it used,
1601 * and I'd like folks to report it... GO */
1602 printf(("WAKEUP CALLED\n"));
1603 queue->numOutstanding++;
1604
1605 /* XXX need to glue the original buffer into this?? */
1606
1607 KernelWakeupFunc(&raidbp->rf_buf);
1608 break;
1609
1610 case RF_IO_TYPE_READ:
1611 case RF_IO_TYPE_WRITE:
1612
1613 if (req->tracerec) {
1614 RF_ETIMER_START(req->tracerec->timer);
1615 }
1616 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1617 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1618 req->sectorOffset, req->numSector,
1619 req->buf, KernelWakeupFunc, (void *) req,
1620 queue->raidPtr->logBytesPerSector, req->b_proc);
1621
1622 if (rf_debugKernelAccess) {
1623 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1624 (long) bp->b_blkno));
1625 }
1626 queue->numOutstanding++;
1627 queue->last_deq_sector = req->sectorOffset;
1628 /* acc wouldn't have been let in if there were any pending
1629 * reqs at any other priority */
1630 queue->curPriority = req->priority;
1631 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1632 * req->type, queue->row, queue->col); */
1633
1634 db1_printf(("Going for %c to unit %d row %d col %d\n",
1635 req->type, unit, queue->row, queue->col));
1636 db1_printf(("sector %d count %d (%d bytes) %d\n",
1637 (int) req->sectorOffset, (int) req->numSector,
1638 (int) (req->numSector <<
1639 queue->raidPtr->logBytesPerSector),
1640 (int) queue->raidPtr->logBytesPerSector));
1641 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1642 raidbp->rf_buf.b_vp->v_numoutput++;
1643 }
1644 VOP_STRATEGY(&raidbp->rf_buf);
1645
1646 break;
1647
1648 default:
1649 panic("bad req->type in rf_DispatchKernelIO");
1650 }
1651 db1_printf(("Exiting from DispatchKernelIO\n"));
1652 /* splx(s); */ /* want to test this */
1653 return (0);
1654 }
1655 /* this is the callback function associated with a I/O invoked from
1656 kernel code.
1657 */
1658 static void
1659 KernelWakeupFunc(vbp)
1660 struct buf *vbp;
1661 {
1662 RF_DiskQueueData_t *req = NULL;
1663 RF_DiskQueue_t *queue;
1664 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1665 struct buf *bp;
1666 struct raid_softc *rs;
1667 int unit;
1668 register int s;
1669
1670 s = splbio();
1671 db1_printf(("recovering the request queue:\n"));
1672 req = raidbp->req;
1673
1674 bp = raidbp->rf_obp;
1675
1676 queue = (RF_DiskQueue_t *) req->queue;
1677
1678 if (raidbp->rf_buf.b_flags & B_ERROR) {
1679 bp->b_flags |= B_ERROR;
1680 bp->b_error = raidbp->rf_buf.b_error ?
1681 raidbp->rf_buf.b_error : EIO;
1682 }
1683
1684 /* XXX methinks this could be wrong... */
1685 #if 1
1686 bp->b_resid = raidbp->rf_buf.b_resid;
1687 #endif
1688
1689 if (req->tracerec) {
1690 RF_ETIMER_STOP(req->tracerec->timer);
1691 RF_ETIMER_EVAL(req->tracerec->timer);
1692 RF_LOCK_MUTEX(rf_tracing_mutex);
1693 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1694 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1695 req->tracerec->num_phys_ios++;
1696 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1697 }
1698 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1699
1700 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1701
1702
1703 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1704 * ballistic, and mark the component as hosed... */
1705
1706 if (bp->b_flags & B_ERROR) {
1707 /* Mark the disk as dead */
1708 /* but only mark it once... */
1709 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1710 rf_ds_optimal) {
1711 printf("raid%d: IO Error. Marking %s as failed.\n",
1712 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1713 queue->raidPtr->Disks[queue->row][queue->col].status =
1714 rf_ds_failed;
1715 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1716 queue->raidPtr->numFailures++;
1717 /* XXX here we should bump the version number for each component, and write that data out */
1718 } else { /* Disk is already dead... */
1719 /* printf("Disk already marked as dead!\n"); */
1720 }
1721
1722 }
1723
1724 rs = &raid_softc[unit];
1725 RAIDPUTBUF(rs, raidbp);
1726
1727
1728 if (bp->b_resid == 0) {
1729 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1730 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1731 }
1732
1733 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1734 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1735
1736 splx(s);
1737 }
1738
1739
1740
1741 /*
1742 * initialize a buf structure for doing an I/O in the kernel.
1743 */
1744 static void
1745 InitBP(
1746 struct buf * bp,
1747 struct vnode * b_vp,
1748 unsigned rw_flag,
1749 dev_t dev,
1750 RF_SectorNum_t startSect,
1751 RF_SectorCount_t numSect,
1752 caddr_t buf,
1753 void (*cbFunc) (struct buf *),
1754 void *cbArg,
1755 int logBytesPerSector,
1756 struct proc * b_proc)
1757 {
1758 /* bp->b_flags = B_PHYS | rw_flag; */
1759 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1760 bp->b_bcount = numSect << logBytesPerSector;
1761 bp->b_bufsize = bp->b_bcount;
1762 bp->b_error = 0;
1763 bp->b_dev = dev;
1764 bp->b_un.b_addr = buf;
1765 bp->b_blkno = startSect;
1766 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1767 if (bp->b_bcount == 0) {
1768 panic("bp->b_bcount is zero in InitBP!!\n");
1769 }
1770 bp->b_proc = b_proc;
1771 bp->b_iodone = cbFunc;
1772 bp->b_vp = b_vp;
1773
1774 }
1775
1776 static void
1777 raidgetdefaultlabel(raidPtr, rs, lp)
1778 RF_Raid_t *raidPtr;
1779 struct raid_softc *rs;
1780 struct disklabel *lp;
1781 {
1782 db1_printf(("Building a default label...\n"));
1783 bzero(lp, sizeof(*lp));
1784
1785 /* fabricate a label... */
1786 lp->d_secperunit = raidPtr->totalSectors;
1787 lp->d_secsize = raidPtr->bytesPerSector;
1788 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1789 lp->d_ntracks = 1;
1790 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1791 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1792
1793 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1794 lp->d_type = DTYPE_RAID;
1795 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1796 lp->d_rpm = 3600;
1797 lp->d_interleave = 1;
1798 lp->d_flags = 0;
1799
1800 lp->d_partitions[RAW_PART].p_offset = 0;
1801 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1802 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1803 lp->d_npartitions = RAW_PART + 1;
1804
1805 lp->d_magic = DISKMAGIC;
1806 lp->d_magic2 = DISKMAGIC;
1807 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1808
1809 }
1810 /*
1811 * Read the disklabel from the raid device. If one is not present, fake one
1812 * up.
1813 */
1814 static void
1815 raidgetdisklabel(dev)
1816 dev_t dev;
1817 {
1818 int unit = raidunit(dev);
1819 struct raid_softc *rs = &raid_softc[unit];
1820 char *errstring;
1821 struct disklabel *lp = rs->sc_dkdev.dk_label;
1822 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1823 RF_Raid_t *raidPtr;
1824
1825 db1_printf(("Getting the disklabel...\n"));
1826
1827 bzero(clp, sizeof(*clp));
1828
1829 raidPtr = raidPtrs[unit];
1830
1831 raidgetdefaultlabel(raidPtr, rs, lp);
1832
1833 /*
1834 * Call the generic disklabel extraction routine.
1835 */
1836 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1837 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1838 if (errstring)
1839 raidmakedisklabel(rs);
1840 else {
1841 int i;
1842 struct partition *pp;
1843
1844 /*
1845 * Sanity check whether the found disklabel is valid.
1846 *
1847 * This is necessary since total size of the raid device
1848 * may vary when an interleave is changed even though exactly
1849 * same componets are used, and old disklabel may used
1850 * if that is found.
1851 */
1852 if (lp->d_secperunit != rs->sc_size)
1853 printf("WARNING: %s: "
1854 "total sector size in disklabel (%d) != "
1855 "the size of raid (%ld)\n", rs->sc_xname,
1856 lp->d_secperunit, (long) rs->sc_size);
1857 for (i = 0; i < lp->d_npartitions; i++) {
1858 pp = &lp->d_partitions[i];
1859 if (pp->p_offset + pp->p_size > rs->sc_size)
1860 printf("WARNING: %s: end of partition `%c' "
1861 "exceeds the size of raid (%ld)\n",
1862 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1863 }
1864 }
1865
1866 }
1867 /*
1868 * Take care of things one might want to take care of in the event
1869 * that a disklabel isn't present.
1870 */
1871 static void
1872 raidmakedisklabel(rs)
1873 struct raid_softc *rs;
1874 {
1875 struct disklabel *lp = rs->sc_dkdev.dk_label;
1876 db1_printf(("Making a label..\n"));
1877
1878 /*
1879 * For historical reasons, if there's no disklabel present
1880 * the raw partition must be marked FS_BSDFFS.
1881 */
1882
1883 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1884
1885 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1886
1887 lp->d_checksum = dkcksum(lp);
1888 }
1889 /*
1890 * Lookup the provided name in the filesystem. If the file exists,
1891 * is a valid block device, and isn't being used by anyone else,
1892 * set *vpp to the file's vnode.
1893 * You'll find the original of this in ccd.c
1894 */
1895 int
1896 raidlookup(path, p, vpp)
1897 char *path;
1898 struct proc *p;
1899 struct vnode **vpp; /* result */
1900 {
1901 struct nameidata nd;
1902 struct vnode *vp;
1903 struct vattr va;
1904 int error;
1905
1906 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1907 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1908 #ifdef DEBUG
1909 printf("RAIDframe: vn_open returned %d\n", error);
1910 #endif
1911 return (error);
1912 }
1913 vp = nd.ni_vp;
1914 if (vp->v_usecount > 1) {
1915 VOP_UNLOCK(vp, 0);
1916 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1917 return (EBUSY);
1918 }
1919 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1920 VOP_UNLOCK(vp, 0);
1921 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1922 return (error);
1923 }
1924 /* XXX: eventually we should handle VREG, too. */
1925 if (va.va_type != VBLK) {
1926 VOP_UNLOCK(vp, 0);
1927 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1928 return (ENOTBLK);
1929 }
1930 VOP_UNLOCK(vp, 0);
1931 *vpp = vp;
1932 return (0);
1933 }
1934 /*
1935 * Wait interruptibly for an exclusive lock.
1936 *
1937 * XXX
1938 * Several drivers do this; it should be abstracted and made MP-safe.
1939 * (Hmm... where have we seen this warning before :-> GO )
1940 */
1941 static int
1942 raidlock(rs)
1943 struct raid_softc *rs;
1944 {
1945 int error;
1946
1947 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1948 rs->sc_flags |= RAIDF_WANTED;
1949 if ((error =
1950 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1951 return (error);
1952 }
1953 rs->sc_flags |= RAIDF_LOCKED;
1954 return (0);
1955 }
1956 /*
1957 * Unlock and wake up any waiters.
1958 */
1959 static void
1960 raidunlock(rs)
1961 struct raid_softc *rs;
1962 {
1963
1964 rs->sc_flags &= ~RAIDF_LOCKED;
1965 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1966 rs->sc_flags &= ~RAIDF_WANTED;
1967 wakeup(rs);
1968 }
1969 }
1970
1971
1972 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1973 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1974
1975 int
1976 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1977 {
1978 RF_ComponentLabel_t component_label;
1979 raidread_component_label(dev, b_vp, &component_label);
1980 component_label.mod_counter = mod_counter;
1981 component_label.clean = RF_RAID_CLEAN;
1982 raidwrite_component_label(dev, b_vp, &component_label);
1983 return(0);
1984 }
1985
1986
1987 int
1988 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1989 {
1990 RF_ComponentLabel_t component_label;
1991 raidread_component_label(dev, b_vp, &component_label);
1992 component_label.mod_counter = mod_counter;
1993 component_label.clean = RF_RAID_DIRTY;
1994 raidwrite_component_label(dev, b_vp, &component_label);
1995 return(0);
1996 }
1997
1998 /* ARGSUSED */
1999 int
2000 raidread_component_label(dev, b_vp, component_label)
2001 dev_t dev;
2002 struct vnode *b_vp;
2003 RF_ComponentLabel_t *component_label;
2004 {
2005 struct buf *bp;
2006 int error;
2007
2008 /* XXX should probably ensure that we don't try to do this if
2009 someone has changed rf_protected_sectors. */
2010
2011 /* get a block of the appropriate size... */
2012 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2013 bp->b_dev = dev;
2014
2015 /* get our ducks in a row for the read */
2016 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2017 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2018 bp->b_flags = B_BUSY | B_READ;
2019 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2020
2021 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2022
2023 error = biowait(bp);
2024
2025 if (!error) {
2026 memcpy(component_label, bp->b_un.b_addr,
2027 sizeof(RF_ComponentLabel_t));
2028 #if 0
2029 printf("raidread_component_label: got component label:\n");
2030 printf("Version: %d\n",component_label->version);
2031 printf("Serial Number: %d\n",component_label->serial_number);
2032 printf("Mod counter: %d\n",component_label->mod_counter);
2033 printf("Row: %d\n", component_label->row);
2034 printf("Column: %d\n", component_label->column);
2035 printf("Num Rows: %d\n", component_label->num_rows);
2036 printf("Num Columns: %d\n", component_label->num_columns);
2037 printf("Clean: %d\n", component_label->clean);
2038 printf("Status: %d\n", component_label->status);
2039 #endif
2040 } else {
2041 printf("Failed to read RAID component label!\n");
2042 }
2043
2044 bp->b_flags = B_INVAL | B_AGE;
2045 brelse(bp);
2046 return(error);
2047 }
2048 /* ARGSUSED */
2049 int
2050 raidwrite_component_label(dev, b_vp, component_label)
2051 dev_t dev;
2052 struct vnode *b_vp;
2053 RF_ComponentLabel_t *component_label;
2054 {
2055 struct buf *bp;
2056 int error;
2057
2058 /* get a block of the appropriate size... */
2059 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2060 bp->b_dev = dev;
2061
2062 /* get our ducks in a row for the write */
2063 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2064 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2065 bp->b_flags = B_BUSY | B_WRITE;
2066 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2067
2068 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2069
2070 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2071
2072 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2073 error = biowait(bp);
2074 bp->b_flags = B_INVAL | B_AGE;
2075 brelse(bp);
2076 if (error) {
2077 printf("Failed to write RAID component info!\n");
2078 }
2079
2080 return(error);
2081 }
2082
2083 void
2084 rf_markalldirty( raidPtr )
2085 RF_Raid_t *raidPtr;
2086 {
2087 RF_ComponentLabel_t c_label;
2088 int r,c;
2089
2090 raidPtr->mod_counter++;
2091 for (r = 0; r < raidPtr->numRow; r++) {
2092 for (c = 0; c < raidPtr->numCol; c++) {
2093 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2094 raidread_component_label(
2095 raidPtr->Disks[r][c].dev,
2096 raidPtr->raid_cinfo[r][c].ci_vp,
2097 &c_label);
2098 if (c_label.status == rf_ds_spared) {
2099 /* XXX do something special...
2100 but whatever you do, don't
2101 try to access it!! */
2102 } else {
2103 #if 0
2104 c_label.status =
2105 raidPtr->Disks[r][c].status;
2106 raidwrite_component_label(
2107 raidPtr->Disks[r][c].dev,
2108 raidPtr->raid_cinfo[r][c].ci_vp,
2109 &c_label);
2110 #endif
2111 raidmarkdirty(
2112 raidPtr->Disks[r][c].dev,
2113 raidPtr->raid_cinfo[r][c].ci_vp,
2114 raidPtr->mod_counter);
2115 }
2116 }
2117 }
2118 }
2119 /* printf("Component labels marked dirty.\n"); */
2120 #if 0
2121 for( c = 0; c < raidPtr->numSpare ; c++) {
2122 sparecol = raidPtr->numCol + c;
2123 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2124 /*
2125
2126 XXX this is where we get fancy and map this spare
2127 into it's correct spot in the array.
2128
2129 */
2130 /*
2131
2132 we claim this disk is "optimal" if it's
2133 rf_ds_used_spare, as that means it should be
2134 directly substitutable for the disk it replaced.
2135 We note that too...
2136
2137 */
2138
2139 for(i=0;i<raidPtr->numRow;i++) {
2140 for(j=0;j<raidPtr->numCol;j++) {
2141 if ((raidPtr->Disks[i][j].spareRow ==
2142 r) &&
2143 (raidPtr->Disks[i][j].spareCol ==
2144 sparecol)) {
2145 srow = r;
2146 scol = sparecol;
2147 break;
2148 }
2149 }
2150 }
2151
2152 raidread_component_label(
2153 raidPtr->Disks[r][sparecol].dev,
2154 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2155 &c_label);
2156 /* make sure status is noted */
2157 c_label.version = RF_COMPONENT_LABEL_VERSION;
2158 c_label.mod_counter = raidPtr->mod_counter;
2159 c_label.serial_number = raidPtr->serial_number;
2160 c_label.row = srow;
2161 c_label.column = scol;
2162 c_label.num_rows = raidPtr->numRow;
2163 c_label.num_columns = raidPtr->numCol;
2164 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2165 c_label.status = rf_ds_optimal;
2166 raidwrite_component_label(
2167 raidPtr->Disks[r][sparecol].dev,
2168 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2169 &c_label);
2170 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2171 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2172 }
2173 }
2174
2175 #endif
2176 }
2177
2178
2179 void
2180 rf_update_component_labels( raidPtr )
2181 RF_Raid_t *raidPtr;
2182 {
2183 RF_ComponentLabel_t c_label;
2184 int sparecol;
2185 int r,c;
2186 int i,j;
2187 int srow, scol;
2188
2189 srow = -1;
2190 scol = -1;
2191
2192 /* XXX should do extra checks to make sure things really are clean,
2193 rather than blindly setting the clean bit... */
2194
2195 raidPtr->mod_counter++;
2196
2197 for (r = 0; r < raidPtr->numRow; r++) {
2198 for (c = 0; c < raidPtr->numCol; c++) {
2199 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2200 raidread_component_label(
2201 raidPtr->Disks[r][c].dev,
2202 raidPtr->raid_cinfo[r][c].ci_vp,
2203 &c_label);
2204 /* make sure status is noted */
2205 c_label.status = rf_ds_optimal;
2206 raidwrite_component_label(
2207 raidPtr->Disks[r][c].dev,
2208 raidPtr->raid_cinfo[r][c].ci_vp,
2209 &c_label);
2210 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2211 raidmarkclean(
2212 raidPtr->Disks[r][c].dev,
2213 raidPtr->raid_cinfo[r][c].ci_vp,
2214 raidPtr->mod_counter);
2215 }
2216 }
2217 /* else we don't touch it.. */
2218 #if 0
2219 else if (raidPtr->Disks[r][c].status !=
2220 rf_ds_failed) {
2221 raidread_component_label(
2222 raidPtr->Disks[r][c].dev,
2223 raidPtr->raid_cinfo[r][c].ci_vp,
2224 &c_label);
2225 /* make sure status is noted */
2226 c_label.status =
2227 raidPtr->Disks[r][c].status;
2228 raidwrite_component_label(
2229 raidPtr->Disks[r][c].dev,
2230 raidPtr->raid_cinfo[r][c].ci_vp,
2231 &c_label);
2232 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2233 raidmarkclean(
2234 raidPtr->Disks[r][c].dev,
2235 raidPtr->raid_cinfo[r][c].ci_vp,
2236 raidPtr->mod_counter);
2237 }
2238 }
2239 #endif
2240 }
2241 }
2242
2243 for( c = 0; c < raidPtr->numSpare ; c++) {
2244 sparecol = raidPtr->numCol + c;
2245 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2246 /*
2247
2248 we claim this disk is "optimal" if it's
2249 rf_ds_used_spare, as that means it should be
2250 directly substitutable for the disk it replaced.
2251 We note that too...
2252
2253 */
2254
2255 for(i=0;i<raidPtr->numRow;i++) {
2256 for(j=0;j<raidPtr->numCol;j++) {
2257 if ((raidPtr->Disks[i][j].spareRow ==
2258 0) &&
2259 (raidPtr->Disks[i][j].spareCol ==
2260 sparecol)) {
2261 srow = i;
2262 scol = j;
2263 break;
2264 }
2265 }
2266 }
2267
2268 raidread_component_label(
2269 raidPtr->Disks[0][sparecol].dev,
2270 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2271 &c_label);
2272 /* make sure status is noted */
2273 c_label.version = RF_COMPONENT_LABEL_VERSION;
2274 c_label.mod_counter = raidPtr->mod_counter;
2275 c_label.serial_number = raidPtr->serial_number;
2276 c_label.row = srow;
2277 c_label.column = scol;
2278 c_label.num_rows = raidPtr->numRow;
2279 c_label.num_columns = raidPtr->numCol;
2280 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2281 c_label.status = rf_ds_optimal;
2282 raidwrite_component_label(
2283 raidPtr->Disks[0][sparecol].dev,
2284 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2285 &c_label);
2286 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2287 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2288 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2289 raidPtr->mod_counter);
2290 }
2291 }
2292 }
2293 /* printf("Component labels updated\n"); */
2294 }
2295
2296 void
2297 rf_ReconThread(req)
2298 struct rf_recon_req *req;
2299 {
2300 int s;
2301 RF_Raid_t *raidPtr;
2302
2303 s = splbio();
2304 raidPtr = (RF_Raid_t *) req->raidPtr;
2305 raidPtr->recon_in_progress = 1;
2306
2307 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2308 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2309
2310 /* XXX get rid of this! we don't need it at all.. */
2311 RF_Free(req, sizeof(*req));
2312
2313 raidPtr->recon_in_progress = 0;
2314 splx(s);
2315
2316 /* That's all... */
2317 kthread_exit(0); /* does not return */
2318 }
2319
2320 void
2321 rf_RewriteParityThread(raidPtr)
2322 RF_Raid_t *raidPtr;
2323 {
2324 int retcode;
2325 int s;
2326
2327 raidPtr->parity_rewrite_in_progress = 1;
2328 s = splbio();
2329 retcode = rf_RewriteParity(raidPtr);
2330 splx(s);
2331 if (retcode) {
2332 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2333 } else {
2334 /* set the clean bit! If we shutdown correctly,
2335 the clean bit on each component label will get
2336 set */
2337 raidPtr->parity_good = RF_RAID_CLEAN;
2338 }
2339 raidPtr->parity_rewrite_in_progress = 0;
2340
2341 /* That's all... */
2342 kthread_exit(0); /* does not return */
2343 }
2344
2345
2346 void
2347 rf_CopybackThread(raidPtr)
2348 RF_Raid_t *raidPtr;
2349 {
2350 int s;
2351
2352 raidPtr->copyback_in_progress = 1;
2353 s = splbio();
2354 rf_CopybackReconstructedData(raidPtr);
2355 splx(s);
2356 raidPtr->copyback_in_progress = 0;
2357
2358 /* That's all... */
2359 kthread_exit(0); /* does not return */
2360 }
2361
2362
2363 void
2364 rf_ReconstructInPlaceThread(req)
2365 struct rf_recon_req *req;
2366 {
2367 int retcode;
2368 int s;
2369 RF_Raid_t *raidPtr;
2370
2371 s = splbio();
2372 raidPtr = req->raidPtr;
2373 raidPtr->recon_in_progress = 1;
2374 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2375 RF_Free(req, sizeof(*req));
2376 raidPtr->recon_in_progress = 0;
2377 splx(s);
2378
2379 /* That's all... */
2380 kthread_exit(0); /* does not return */
2381 }
2382