rf_netbsdkintf.c revision 1.38 1 /* $NetBSD: rf_netbsdkintf.c,v 1.38 2000/01/05 04:15:30 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf * bp);
184 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
185 dev_t dev, RF_SectorNum_t startSect,
186 RF_SectorCount_t numSect, caddr_t buf,
187 void (*cbFunc) (struct buf *), void *cbArg,
188 int logBytesPerSector, struct proc * b_proc);
189
190 #define Dprintf0(s) if (rf_queueDebug) \
191 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
192 #define Dprintf1(s,a) if (rf_queueDebug) \
193 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
194 #define Dprintf2(s,a,b) if (rf_queueDebug) \
195 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
196 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
197 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
198
199 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
200 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
201
202 void raidattach __P((int));
203 int raidsize __P((dev_t));
204
205 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
206 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
207 static int raidinit __P((dev_t, RF_Raid_t *, int));
208
209 int raidopen __P((dev_t, int, int, struct proc *));
210 int raidclose __P((dev_t, int, int, struct proc *));
211 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
212 int raidwrite __P((dev_t, struct uio *, int));
213 int raidread __P((dev_t, struct uio *, int));
214 void raidstrategy __P((struct buf *));
215 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
216
217 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
218 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
219 void rf_update_component_labels( RF_Raid_t *);
220 /*
221 * Pilfered from ccd.c
222 */
223
224 struct raidbuf {
225 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
226 struct buf *rf_obp; /* ptr. to original I/O buf */
227 int rf_flags; /* misc. flags */
228 RF_DiskQueueData_t *req;/* the request that this was part of.. */
229 };
230
231
232 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
233 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
234
235 /* XXX Not sure if the following should be replacing the raidPtrs above,
236 or if it should be used in conjunction with that... */
237
238 struct raid_softc {
239 int sc_flags; /* flags */
240 int sc_cflags; /* configuration flags */
241 size_t sc_size; /* size of the raid device */
242 dev_t sc_dev; /* our device.. */
243 char sc_xname[20]; /* XXX external name */
244 struct disk sc_dkdev; /* generic disk device info */
245 struct pool sc_cbufpool; /* component buffer pool */
246 struct buf buf_queue; /* used for the device queue */
247 };
248 /* sc_flags */
249 #define RAIDF_INITED 0x01 /* unit has been initialized */
250 #define RAIDF_WLABEL 0x02 /* label area is writable */
251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
253 #define RAIDF_LOCKED 0x80 /* unit is locked */
254
255 #define raidunit(x) DISKUNIT(x)
256 static int numraid = 0;
257
258 /*
259 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
260 * Be aware that large numbers can allow the driver to consume a lot of
261 * kernel memory, especially on writes, and in degraded mode reads.
262 *
263 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
264 * a single 64K write will typically require 64K for the old data,
265 * 64K for the old parity, and 64K for the new parity, for a total
266 * of 192K (if the parity buffer is not re-used immediately).
267 * Even it if is used immedately, that's still 128K, which when multiplied
268 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
269 *
270 * Now in degraded mode, for example, a 64K read on the above setup may
271 * require data reconstruction, which will require *all* of the 4 remaining
272 * disks to participate -- 4 * 32K/disk == 128K again.
273 */
274
275 #ifndef RAIDOUTSTANDING
276 #define RAIDOUTSTANDING 6
277 #endif
278
279 #define RAIDLABELDEV(dev) \
280 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
281
282 /* declared here, and made public, for the benefit of KVM stuff.. */
283 struct raid_softc *raid_softc;
284
285 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
286 struct disklabel *));
287 static void raidgetdisklabel __P((dev_t));
288 static void raidmakedisklabel __P((struct raid_softc *));
289
290 static int raidlock __P((struct raid_softc *));
291 static void raidunlock __P((struct raid_softc *));
292 int raidlookup __P((char *, struct proc * p, struct vnode **));
293
294 static void rf_markalldirty __P((RF_Raid_t *));
295
296 void rf_ReconThread __P((struct rf_recon_req *));
297 /* XXX what I want is: */
298 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
299 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
300 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
301 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
302
303 void
304 raidattach(num)
305 int num;
306 {
307 int raidID;
308 int i, rc;
309
310 #ifdef DEBUG
311 printf("raidattach: Asked for %d units\n", num);
312 #endif
313
314 if (num <= 0) {
315 #ifdef DIAGNOSTIC
316 panic("raidattach: count <= 0");
317 #endif
318 return;
319 }
320 /* This is where all the initialization stuff gets done. */
321
322 /* Make some space for requested number of units... */
323
324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
325 if (raidPtrs == NULL) {
326 panic("raidPtrs is NULL!!\n");
327 }
328
329 rc = rf_mutex_init(&rf_sparet_wait_mutex);
330 if (rc) {
331 RF_PANIC();
332 }
333
334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
335
336 for (i = 0; i < numraid; i++)
337 raidPtrs[i] = NULL;
338 rc = rf_BootRaidframe();
339 if (rc == 0)
340 printf("Kernelized RAIDframe activated\n");
341 else
342 panic("Serious error booting RAID!!\n");
343
344 /* put together some datastructures like the CCD device does.. This
345 * lets us lock the device and what-not when it gets opened. */
346
347 raid_softc = (struct raid_softc *)
348 malloc(num * sizeof(struct raid_softc),
349 M_RAIDFRAME, M_NOWAIT);
350 if (raid_softc == NULL) {
351 printf("WARNING: no memory for RAIDframe driver\n");
352 return;
353 }
354 numraid = num;
355 bzero(raid_softc, num * sizeof(struct raid_softc));
356
357 for (raidID = 0; raidID < num; raidID++) {
358 raid_softc[raidID].buf_queue.b_actf = NULL;
359 raid_softc[raidID].buf_queue.b_actb =
360 &raid_softc[raidID].buf_queue.b_actf;
361 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
362 (RF_Raid_t *));
363 if (raidPtrs[raidID] == NULL) {
364 printf("raidPtrs[%d] is NULL\n", raidID);
365 }
366 }
367 }
368
369
370 int
371 raidsize(dev)
372 dev_t dev;
373 {
374 struct raid_softc *rs;
375 struct disklabel *lp;
376 int part, unit, omask, size;
377
378 unit = raidunit(dev);
379 if (unit >= numraid)
380 return (-1);
381 rs = &raid_softc[unit];
382
383 if ((rs->sc_flags & RAIDF_INITED) == 0)
384 return (-1);
385
386 part = DISKPART(dev);
387 omask = rs->sc_dkdev.dk_openmask & (1 << part);
388 lp = rs->sc_dkdev.dk_label;
389
390 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
391 return (-1);
392
393 if (lp->d_partitions[part].p_fstype != FS_SWAP)
394 size = -1;
395 else
396 size = lp->d_partitions[part].p_size *
397 (lp->d_secsize / DEV_BSIZE);
398
399 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
400 return (-1);
401
402 return (size);
403
404 }
405
406 int
407 raiddump(dev, blkno, va, size)
408 dev_t dev;
409 daddr_t blkno;
410 caddr_t va;
411 size_t size;
412 {
413 /* Not implemented. */
414 return ENXIO;
415 }
416 /* ARGSUSED */
417 int
418 raidopen(dev, flags, fmt, p)
419 dev_t dev;
420 int flags, fmt;
421 struct proc *p;
422 {
423 int unit = raidunit(dev);
424 struct raid_softc *rs;
425 struct disklabel *lp;
426 int part, pmask;
427 int error = 0;
428
429 if (unit >= numraid)
430 return (ENXIO);
431 rs = &raid_softc[unit];
432
433 if ((error = raidlock(rs)) != 0)
434 return (error);
435 lp = rs->sc_dkdev.dk_label;
436
437 part = DISKPART(dev);
438 pmask = (1 << part);
439
440 db1_printf(("Opening raid device number: %d partition: %d\n",
441 unit, part));
442
443
444 if ((rs->sc_flags & RAIDF_INITED) &&
445 (rs->sc_dkdev.dk_openmask == 0))
446 raidgetdisklabel(dev);
447
448 /* make sure that this partition exists */
449
450 if (part != RAW_PART) {
451 db1_printf(("Not a raw partition..\n"));
452 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
453 ((part >= lp->d_npartitions) ||
454 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
455 error = ENXIO;
456 raidunlock(rs);
457 db1_printf(("Bailing out...\n"));
458 return (error);
459 }
460 }
461 /* Prevent this unit from being unconfigured while open. */
462 switch (fmt) {
463 case S_IFCHR:
464 rs->sc_dkdev.dk_copenmask |= pmask;
465 break;
466
467 case S_IFBLK:
468 rs->sc_dkdev.dk_bopenmask |= pmask;
469 break;
470 }
471
472 if ((rs->sc_dkdev.dk_openmask == 0) &&
473 ((rs->sc_flags & RAIDF_INITED) != 0)) {
474 /* First one... mark things as dirty... Note that we *MUST*
475 have done a configure before this. I DO NOT WANT TO BE
476 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
477 THAT THEY BELONG TOGETHER!!!!! */
478 /* XXX should check to see if we're only open for reading
479 here... If so, we needn't do this, but then need some
480 other way of keeping track of what's happened.. */
481
482 rf_markalldirty( raidPtrs[unit] );
483 }
484
485
486 rs->sc_dkdev.dk_openmask =
487 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
488
489 raidunlock(rs);
490
491 return (error);
492
493
494 }
495 /* ARGSUSED */
496 int
497 raidclose(dev, flags, fmt, p)
498 dev_t dev;
499 int flags, fmt;
500 struct proc *p;
501 {
502 int unit = raidunit(dev);
503 struct raid_softc *rs;
504 int error = 0;
505 int part;
506
507 if (unit >= numraid)
508 return (ENXIO);
509 rs = &raid_softc[unit];
510
511 if ((error = raidlock(rs)) != 0)
512 return (error);
513
514 part = DISKPART(dev);
515
516 /* ...that much closer to allowing unconfiguration... */
517 switch (fmt) {
518 case S_IFCHR:
519 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
520 break;
521
522 case S_IFBLK:
523 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
524 break;
525 }
526 rs->sc_dkdev.dk_openmask =
527 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
528
529 if ((rs->sc_dkdev.dk_openmask == 0) &&
530 ((rs->sc_flags & RAIDF_INITED) != 0)) {
531 /* Last one... device is not unconfigured yet.
532 Device shutdown has taken care of setting the
533 clean bits if RAIDF_INITED is not set
534 mark things as clean... */
535 rf_update_component_labels( raidPtrs[unit] );
536 }
537
538 raidunlock(rs);
539 return (0);
540
541 }
542
543 void
544 raidstrategy(bp)
545 register struct buf *bp;
546 {
547 register int s;
548
549 unsigned int raidID = raidunit(bp->b_dev);
550 RF_Raid_t *raidPtr;
551 struct raid_softc *rs = &raid_softc[raidID];
552 struct disklabel *lp;
553 struct buf *dp;
554 int wlabel;
555
556 if ((rs->sc_flags & RAIDF_INITED) ==0) {
557 bp->b_error = ENXIO;
558 bp->b_flags = B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 if (raidID >= numraid || !raidPtrs[raidID]) {
564 bp->b_error = ENODEV;
565 bp->b_flags |= B_ERROR;
566 bp->b_resid = bp->b_bcount;
567 biodone(bp);
568 return;
569 }
570 raidPtr = raidPtrs[raidID];
571 if (!raidPtr->valid) {
572 bp->b_error = ENODEV;
573 bp->b_flags |= B_ERROR;
574 bp->b_resid = bp->b_bcount;
575 biodone(bp);
576 return;
577 }
578 if (bp->b_bcount == 0) {
579 db1_printf(("b_bcount is zero..\n"));
580 biodone(bp);
581 return;
582 }
583 lp = rs->sc_dkdev.dk_label;
584
585 /*
586 * Do bounds checking and adjust transfer. If there's an
587 * error, the bounds check will flag that for us.
588 */
589
590 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
591 if (DISKPART(bp->b_dev) != RAW_PART)
592 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
593 db1_printf(("Bounds check failed!!:%d %d\n",
594 (int) bp->b_blkno, (int) wlabel));
595 biodone(bp);
596 return;
597 }
598 s = splbio();
599
600 bp->b_resid = 0;
601
602 /* stuff it onto our queue */
603
604 dp = &rs->buf_queue;
605 bp->b_actf = NULL;
606 bp->b_actb = dp->b_actb;
607 *dp->b_actb = bp;
608 dp->b_actb = &bp->b_actf;
609
610 raidstart(raidPtrs[raidID]);
611
612 splx(s);
613 }
614 /* ARGSUSED */
615 int
616 raidread(dev, uio, flags)
617 dev_t dev;
618 struct uio *uio;
619 int flags;
620 {
621 int unit = raidunit(dev);
622 struct raid_softc *rs;
623 int part;
624
625 if (unit >= numraid)
626 return (ENXIO);
627 rs = &raid_softc[unit];
628
629 if ((rs->sc_flags & RAIDF_INITED) == 0)
630 return (ENXIO);
631 part = DISKPART(dev);
632
633 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
634
635 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
636
637 }
638 /* ARGSUSED */
639 int
640 raidwrite(dev, uio, flags)
641 dev_t dev;
642 struct uio *uio;
643 int flags;
644 {
645 int unit = raidunit(dev);
646 struct raid_softc *rs;
647
648 if (unit >= numraid)
649 return (ENXIO);
650 rs = &raid_softc[unit];
651
652 if ((rs->sc_flags & RAIDF_INITED) == 0)
653 return (ENXIO);
654 db1_printf(("raidwrite\n"));
655 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
656
657 }
658
659 int
660 raidioctl(dev, cmd, data, flag, p)
661 dev_t dev;
662 u_long cmd;
663 caddr_t data;
664 int flag;
665 struct proc *p;
666 {
667 int unit = raidunit(dev);
668 int error = 0;
669 int part, pmask;
670 struct raid_softc *rs;
671 RF_Config_t *k_cfg, *u_cfg;
672 u_char *specific_buf;
673 int retcode = 0;
674 int row;
675 int column;
676 struct rf_recon_req *rrcopy, *rr;
677 RF_ComponentLabel_t *component_label;
678 RF_ComponentLabel_t ci_label;
679 RF_ComponentLabel_t **c_label_ptr;
680 RF_SingleComponent_t *sparePtr,*componentPtr;
681 RF_SingleComponent_t hot_spare;
682 RF_SingleComponent_t component;
683
684 if (unit >= numraid)
685 return (ENXIO);
686 rs = &raid_softc[unit];
687
688 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
689 (int) DISKPART(dev), (int) unit, (int) cmd));
690
691 /* Must be open for writes for these commands... */
692 switch (cmd) {
693 case DIOCSDINFO:
694 case DIOCWDINFO:
695 case DIOCWLABEL:
696 if ((flag & FWRITE) == 0)
697 return (EBADF);
698 }
699
700 /* Must be initialized for these... */
701 switch (cmd) {
702 case DIOCGDINFO:
703 case DIOCSDINFO:
704 case DIOCWDINFO:
705 case DIOCGPART:
706 case DIOCWLABEL:
707 case DIOCGDEFLABEL:
708 case RAIDFRAME_SHUTDOWN:
709 case RAIDFRAME_REWRITEPARITY:
710 case RAIDFRAME_GET_INFO:
711 case RAIDFRAME_RESET_ACCTOTALS:
712 case RAIDFRAME_GET_ACCTOTALS:
713 case RAIDFRAME_KEEP_ACCTOTALS:
714 case RAIDFRAME_GET_SIZE:
715 case RAIDFRAME_FAIL_DISK:
716 case RAIDFRAME_COPYBACK:
717 case RAIDFRAME_CHECK_RECON_STATUS:
718 case RAIDFRAME_GET_COMPONENT_LABEL:
719 case RAIDFRAME_SET_COMPONENT_LABEL:
720 case RAIDFRAME_ADD_HOT_SPARE:
721 case RAIDFRAME_REMOVE_HOT_SPARE:
722 case RAIDFRAME_INIT_LABELS:
723 case RAIDFRAME_REBUILD_IN_PLACE:
724 case RAIDFRAME_CHECK_PARITY:
725 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
726 case RAIDFRAME_CHECK_COPYBACK_STATUS:
727 if ((rs->sc_flags & RAIDF_INITED) == 0)
728 return (ENXIO);
729 }
730
731 switch (cmd) {
732
733
734 /* configure the system */
735 case RAIDFRAME_CONFIGURE:
736
737 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
738 /* copy-in the configuration information */
739 /* data points to a pointer to the configuration structure */
740 u_cfg = *((RF_Config_t **) data);
741 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
742 if (k_cfg == NULL) {
743 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
744 return (ENOMEM);
745 }
746 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
747 sizeof(RF_Config_t));
748 if (retcode) {
749 RF_Free(k_cfg, sizeof(RF_Config_t));
750 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
751 retcode));
752 return (retcode);
753 }
754 /* allocate a buffer for the layout-specific data, and copy it
755 * in */
756 if (k_cfg->layoutSpecificSize) {
757 if (k_cfg->layoutSpecificSize > 10000) {
758 /* sanity check */
759 RF_Free(k_cfg, sizeof(RF_Config_t));
760 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
761 return (EINVAL);
762 }
763 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
764 (u_char *));
765 if (specific_buf == NULL) {
766 RF_Free(k_cfg, sizeof(RF_Config_t));
767 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
768 return (ENOMEM);
769 }
770 retcode = copyin(k_cfg->layoutSpecific,
771 (caddr_t) specific_buf,
772 k_cfg->layoutSpecificSize);
773 if (retcode) {
774 RF_Free(k_cfg, sizeof(RF_Config_t));
775 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
776 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
777 retcode));
778 return (retcode);
779 }
780 } else
781 specific_buf = NULL;
782 k_cfg->layoutSpecific = specific_buf;
783
784 /* should do some kind of sanity check on the configuration.
785 * Store the sum of all the bytes in the last byte? */
786
787 /* configure the system */
788
789 raidPtrs[unit]->raidid = unit;
790
791 retcode = rf_Configure(raidPtrs[unit], k_cfg);
792
793 /* allow this many simultaneous IO's to this RAID device */
794 raidPtrs[unit]->openings = RAIDOUTSTANDING;
795
796 /* XXX should be moved to rf_Configure() */
797
798 raidPtrs[unit]->copyback_in_progress = 0;
799 raidPtrs[unit]->parity_rewrite_in_progress = 0;
800 raidPtrs[unit]->recon_in_progress = 0;
801
802 if (retcode == 0) {
803 retcode = raidinit(dev, raidPtrs[unit], unit);
804 rf_markalldirty( raidPtrs[unit] );
805 }
806 /* free the buffers. No return code here. */
807 if (k_cfg->layoutSpecificSize) {
808 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
809 }
810 RF_Free(k_cfg, sizeof(RF_Config_t));
811
812 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
813 retcode));
814
815 return (retcode);
816
817 /* shutdown the system */
818 case RAIDFRAME_SHUTDOWN:
819
820 if ((error = raidlock(rs)) != 0)
821 return (error);
822
823 /*
824 * If somebody has a partition mounted, we shouldn't
825 * shutdown.
826 */
827
828 part = DISKPART(dev);
829 pmask = (1 << part);
830 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
831 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
832 (rs->sc_dkdev.dk_copenmask & pmask))) {
833 raidunlock(rs);
834 return (EBUSY);
835 }
836
837 if (rf_debugKernelAccess) {
838 printf("call shutdown\n");
839 }
840
841 retcode = rf_Shutdown(raidPtrs[unit]);
842
843 db1_printf(("Done main shutdown\n"));
844
845 pool_destroy(&rs->sc_cbufpool);
846 db1_printf(("Done freeing component buffer freelist\n"));
847
848 /* It's no longer initialized... */
849 rs->sc_flags &= ~RAIDF_INITED;
850
851 /* Detach the disk. */
852 disk_detach(&rs->sc_dkdev);
853
854 raidunlock(rs);
855
856 return (retcode);
857 case RAIDFRAME_GET_COMPONENT_LABEL:
858 c_label_ptr = (RF_ComponentLabel_t **) data;
859 /* need to read the component label for the disk indicated
860 by row,column in component_label
861 XXX need to sanity check these values!!!
862 */
863
864 /* For practice, let's get it directly fromdisk, rather
865 than from the in-core copy */
866 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
867 (RF_ComponentLabel_t *));
868 if (component_label == NULL)
869 return (ENOMEM);
870
871 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
872
873 retcode = copyin( *c_label_ptr, component_label,
874 sizeof(RF_ComponentLabel_t));
875
876 if (retcode) {
877 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
878 return(retcode);
879 }
880
881 row = component_label->row;
882 column = component_label->column;
883
884 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
885 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
886 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
887 return(EINVAL);
888 }
889
890 raidread_component_label(
891 raidPtrs[unit]->Disks[row][column].dev,
892 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
893 component_label );
894
895 retcode = copyout((caddr_t) component_label,
896 (caddr_t) *c_label_ptr,
897 sizeof(RF_ComponentLabel_t));
898 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
899 return (retcode);
900
901 case RAIDFRAME_SET_COMPONENT_LABEL:
902 component_label = (RF_ComponentLabel_t *) data;
903
904 /* XXX check the label for valid stuff... */
905 /* Note that some things *should not* get modified --
906 the user should be re-initing the labels instead of
907 trying to patch things.
908 */
909
910 printf("Got component label:\n");
911 printf("Version: %d\n",component_label->version);
912 printf("Serial Number: %d\n",component_label->serial_number);
913 printf("Mod counter: %d\n",component_label->mod_counter);
914 printf("Row: %d\n", component_label->row);
915 printf("Column: %d\n", component_label->column);
916 printf("Num Rows: %d\n", component_label->num_rows);
917 printf("Num Columns: %d\n", component_label->num_columns);
918 printf("Clean: %d\n", component_label->clean);
919 printf("Status: %d\n", component_label->status);
920
921 row = component_label->row;
922 column = component_label->column;
923
924 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
925 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
926 return(EINVAL);
927 }
928
929 /* XXX this isn't allowed to do anything for now :-) */
930 #if 0
931 raidwrite_component_label(
932 raidPtrs[unit]->Disks[row][column].dev,
933 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
934 component_label );
935 #endif
936 return (0);
937
938 case RAIDFRAME_INIT_LABELS:
939 component_label = (RF_ComponentLabel_t *) data;
940 /*
941 we only want the serial number from
942 the above. We get all the rest of the information
943 from the config that was used to create this RAID
944 set.
945 */
946
947 raidPtrs[unit]->serial_number = component_label->serial_number;
948 /* current version number */
949 ci_label.version = RF_COMPONENT_LABEL_VERSION;
950 ci_label.serial_number = component_label->serial_number;
951 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
952 ci_label.num_rows = raidPtrs[unit]->numRow;
953 ci_label.num_columns = raidPtrs[unit]->numCol;
954 ci_label.clean = RF_RAID_DIRTY; /* not clean */
955 ci_label.status = rf_ds_optimal; /* "It's good!" */
956
957 for(row=0;row<raidPtrs[unit]->numRow;row++) {
958 ci_label.row = row;
959 for(column=0;column<raidPtrs[unit]->numCol;column++) {
960 ci_label.column = column;
961 raidwrite_component_label(
962 raidPtrs[unit]->Disks[row][column].dev,
963 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
964 &ci_label );
965 }
966 }
967
968 return (retcode);
969
970 /* initialize all parity */
971 case RAIDFRAME_REWRITEPARITY:
972
973 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
974 /* Parity for RAID 0 is trivially correct */
975 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
976 return(0);
977 }
978
979 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
980 /* Re-write is already in progress! */
981 return(EINVAL);
982 }
983
984 /* borrow the thread of the requesting process */
985
986 retcode = RF_CREATE_THREAD(raidPtrs[unit]->parity_rewrite_thread,
987 rf_RewriteParityThread,
988 raidPtrs[unit],"raid_parity");
989 return (retcode);
990
991
992 case RAIDFRAME_ADD_HOT_SPARE:
993 sparePtr = (RF_SingleComponent_t *) data;
994 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
995 printf("Adding spare\n");
996 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
997 return(retcode);
998
999 case RAIDFRAME_REMOVE_HOT_SPARE:
1000 return(retcode);
1001
1002 case RAIDFRAME_REBUILD_IN_PLACE:
1003
1004 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1005 /* Can't do this on a RAID 0!! */
1006 return(EINVAL);
1007 }
1008
1009 if (raidPtrs[unit]->recon_in_progress == 1) {
1010 /* a reconstruct is already in progress! */
1011 return(EINVAL);
1012 }
1013
1014 componentPtr = (RF_SingleComponent_t *) data;
1015 memcpy( &component, componentPtr,
1016 sizeof(RF_SingleComponent_t));
1017 row = component.row;
1018 column = component.column;
1019 printf("Rebuild: %d %d\n",row, column);
1020 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1021 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1022 return(EINVAL);
1023 }
1024
1025 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1026 if (rrcopy == NULL)
1027 return(ENOMEM);
1028
1029 rrcopy->raidPtr = (void *) raidPtrs[unit];
1030 rrcopy->row = row;
1031 rrcopy->col = column;
1032
1033 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1034 rf_ReconstructInPlaceThread,
1035 rrcopy,"raid_reconip");
1036 return(retcode);
1037
1038 case RAIDFRAME_GET_INFO:
1039 {
1040 RF_Raid_t *raid = raidPtrs[unit];
1041 RF_DeviceConfig_t *cfg, **ucfgp;
1042 int i, j, d;
1043
1044 if (!raid->valid)
1045 return (ENODEV);
1046 ucfgp = (RF_DeviceConfig_t **) data;
1047 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1048 (RF_DeviceConfig_t *));
1049 if (cfg == NULL)
1050 return (ENOMEM);
1051 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1052 cfg->rows = raid->numRow;
1053 cfg->cols = raid->numCol;
1054 cfg->ndevs = raid->numRow * raid->numCol;
1055 if (cfg->ndevs >= RF_MAX_DISKS) {
1056 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1057 return (ENOMEM);
1058 }
1059 cfg->nspares = raid->numSpare;
1060 if (cfg->nspares >= RF_MAX_DISKS) {
1061 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1062 return (ENOMEM);
1063 }
1064 cfg->maxqdepth = raid->maxQueueDepth;
1065 d = 0;
1066 for (i = 0; i < cfg->rows; i++) {
1067 for (j = 0; j < cfg->cols; j++) {
1068 cfg->devs[d] = raid->Disks[i][j];
1069 d++;
1070 }
1071 }
1072 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1073 cfg->spares[i] = raid->Disks[0][j];
1074 }
1075 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1076 sizeof(RF_DeviceConfig_t));
1077 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1078
1079 return (retcode);
1080 }
1081 break;
1082 case RAIDFRAME_CHECK_PARITY:
1083 *(int *) data = raidPtrs[unit]->parity_good;
1084 return (0);
1085 case RAIDFRAME_RESET_ACCTOTALS:
1086 {
1087 RF_Raid_t *raid = raidPtrs[unit];
1088
1089 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1090 return (0);
1091 }
1092 break;
1093
1094 case RAIDFRAME_GET_ACCTOTALS:
1095 {
1096 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1097 RF_Raid_t *raid = raidPtrs[unit];
1098
1099 *totals = raid->acc_totals;
1100 return (0);
1101 }
1102 break;
1103
1104 case RAIDFRAME_KEEP_ACCTOTALS:
1105 {
1106 RF_Raid_t *raid = raidPtrs[unit];
1107 int *keep = (int *) data;
1108
1109 raid->keep_acc_totals = *keep;
1110 return (0);
1111 }
1112 break;
1113
1114 case RAIDFRAME_GET_SIZE:
1115 *(int *) data = raidPtrs[unit]->totalSectors;
1116 return (0);
1117
1118 /* fail a disk & optionally start reconstruction */
1119 case RAIDFRAME_FAIL_DISK:
1120
1121 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1122 /* Can't do this on a RAID 0!! */
1123 return(EINVAL);
1124 }
1125
1126 rr = (struct rf_recon_req *) data;
1127
1128 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1129 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1130 return (EINVAL);
1131
1132 printf("raid%d: Failing the disk: row: %d col: %d\n",
1133 unit, rr->row, rr->col);
1134
1135 /* make a copy of the recon request so that we don't rely on
1136 * the user's buffer */
1137 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1138 if (rrcopy == NULL)
1139 return(ENOMEM);
1140 bcopy(rr, rrcopy, sizeof(*rr));
1141 rrcopy->raidPtr = (void *) raidPtrs[unit];
1142
1143 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1144 rf_ReconThread,
1145 rrcopy,"raid_recon");
1146 return (0);
1147
1148 /* invoke a copyback operation after recon on whatever disk
1149 * needs it, if any */
1150 case RAIDFRAME_COPYBACK:
1151
1152 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1153 /* This makes no sense on a RAID 0!! */
1154 return(EINVAL);
1155 }
1156
1157 if (raidPtrs[unit]->copyback_in_progress == 1) {
1158 /* Copyback is already in progress! */
1159 return(EINVAL);
1160 }
1161
1162 retcode = RF_CREATE_THREAD(raidPtrs[unit]->copyback_thread,
1163 rf_CopybackThread,
1164 raidPtrs[unit],"raid_copyback");
1165 return (retcode);
1166
1167 /* return the percentage completion of reconstruction */
1168 case RAIDFRAME_CHECK_RECON_STATUS:
1169 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1170 /* This makes no sense on a RAID 0 */
1171 return(EINVAL);
1172 }
1173 row = 0; /* XXX we only consider a single row... */
1174 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1175 *(int *) data = 100;
1176 else
1177 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1178 return (0);
1179
1180 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1181 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1182 /* This makes no sense on a RAID 0 */
1183 return(EINVAL);
1184 }
1185 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
1186 *(int *) data = 100 * raidPtrs[unit]->parity_rewrite_stripes_done / raidPtrs[unit]->Layout.numStripe;
1187 } else {
1188 *(int *) data = 100;
1189 }
1190 return (0);
1191
1192 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1193 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1194 /* This makes no sense on a RAID 0 */
1195 return(EINVAL);
1196 }
1197 if (raidPtrs[unit]->copyback_in_progress == 1) {
1198 *(int *) data = 100 * raidPtrs[unit]->copyback_stripes_done / raidPtrs[unit]->Layout.numStripe;
1199 } else {
1200 *(int *) data = 100;
1201 }
1202 return (0);
1203
1204
1205 /* the sparetable daemon calls this to wait for the kernel to
1206 * need a spare table. this ioctl does not return until a
1207 * spare table is needed. XXX -- calling mpsleep here in the
1208 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1209 * -- I should either compute the spare table in the kernel,
1210 * or have a different -- XXX XXX -- interface (a different
1211 * character device) for delivering the table -- XXX */
1212 #if 0
1213 case RAIDFRAME_SPARET_WAIT:
1214 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1215 while (!rf_sparet_wait_queue)
1216 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1217 waitreq = rf_sparet_wait_queue;
1218 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1219 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1220
1221 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1222
1223 RF_Free(waitreq, sizeof(*waitreq));
1224 return (0);
1225
1226
1227 /* wakes up a process waiting on SPARET_WAIT and puts an error
1228 * code in it that will cause the dameon to exit */
1229 case RAIDFRAME_ABORT_SPARET_WAIT:
1230 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1231 waitreq->fcol = -1;
1232 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1233 waitreq->next = rf_sparet_wait_queue;
1234 rf_sparet_wait_queue = waitreq;
1235 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1236 wakeup(&rf_sparet_wait_queue);
1237 return (0);
1238
1239 /* used by the spare table daemon to deliver a spare table
1240 * into the kernel */
1241 case RAIDFRAME_SEND_SPARET:
1242
1243 /* install the spare table */
1244 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1245
1246 /* respond to the requestor. the return status of the spare
1247 * table installation is passed in the "fcol" field */
1248 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1249 waitreq->fcol = retcode;
1250 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1251 waitreq->next = rf_sparet_resp_queue;
1252 rf_sparet_resp_queue = waitreq;
1253 wakeup(&rf_sparet_resp_queue);
1254 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1255
1256 return (retcode);
1257 #endif
1258
1259 default:
1260 break; /* fall through to the os-specific code below */
1261
1262 }
1263
1264 if (!raidPtrs[unit]->valid)
1265 return (EINVAL);
1266
1267 /*
1268 * Add support for "regular" device ioctls here.
1269 */
1270
1271 switch (cmd) {
1272 case DIOCGDINFO:
1273 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1274 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1275 break;
1276
1277 case DIOCGPART:
1278 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1279 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1280 ((struct partinfo *) data)->part =
1281 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1282 break;
1283
1284 case DIOCWDINFO:
1285 db1_printf(("DIOCWDINFO\n"));
1286 case DIOCSDINFO:
1287 db1_printf(("DIOCSDINFO\n"));
1288 if ((error = raidlock(rs)) != 0)
1289 return (error);
1290
1291 rs->sc_flags |= RAIDF_LABELLING;
1292
1293 error = setdisklabel(rs->sc_dkdev.dk_label,
1294 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1295 if (error == 0) {
1296 if (cmd == DIOCWDINFO)
1297 error = writedisklabel(RAIDLABELDEV(dev),
1298 raidstrategy, rs->sc_dkdev.dk_label,
1299 rs->sc_dkdev.dk_cpulabel);
1300 }
1301 rs->sc_flags &= ~RAIDF_LABELLING;
1302
1303 raidunlock(rs);
1304
1305 if (error)
1306 return (error);
1307 break;
1308
1309 case DIOCWLABEL:
1310 db1_printf(("DIOCWLABEL\n"));
1311 if (*(int *) data != 0)
1312 rs->sc_flags |= RAIDF_WLABEL;
1313 else
1314 rs->sc_flags &= ~RAIDF_WLABEL;
1315 break;
1316
1317 case DIOCGDEFLABEL:
1318 db1_printf(("DIOCGDEFLABEL\n"));
1319 raidgetdefaultlabel(raidPtrs[unit], rs,
1320 (struct disklabel *) data);
1321 break;
1322
1323 default:
1324 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1325 }
1326 return (retcode);
1327
1328 }
1329
1330
1331 /* raidinit -- complete the rest of the initialization for the
1332 RAIDframe device. */
1333
1334
1335 static int
1336 raidinit(dev, raidPtr, unit)
1337 dev_t dev;
1338 RF_Raid_t *raidPtr;
1339 int unit;
1340 {
1341 int retcode;
1342 /* int ix; */
1343 /* struct raidbuf *raidbp; */
1344 struct raid_softc *rs;
1345
1346 retcode = 0;
1347
1348 rs = &raid_softc[unit];
1349 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1350 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1351
1352
1353 /* XXX should check return code first... */
1354 rs->sc_flags |= RAIDF_INITED;
1355
1356 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1357
1358 rs->sc_dkdev.dk_name = rs->sc_xname;
1359
1360 /* disk_attach actually creates space for the CPU disklabel, among
1361 * other things, so it's critical to call this *BEFORE* we try putzing
1362 * with disklabels. */
1363
1364 disk_attach(&rs->sc_dkdev);
1365
1366 /* XXX There may be a weird interaction here between this, and
1367 * protectedSectors, as used in RAIDframe. */
1368
1369 rs->sc_size = raidPtr->totalSectors;
1370 rs->sc_dev = dev;
1371
1372 return (retcode);
1373 }
1374
1375 /* wake up the daemon & tell it to get us a spare table
1376 * XXX
1377 * the entries in the queues should be tagged with the raidPtr
1378 * so that in the extremely rare case that two recons happen at once,
1379 * we know for which device were requesting a spare table
1380 * XXX
1381 */
1382 int
1383 rf_GetSpareTableFromDaemon(req)
1384 RF_SparetWait_t *req;
1385 {
1386 int retcode;
1387
1388 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1389 req->next = rf_sparet_wait_queue;
1390 rf_sparet_wait_queue = req;
1391 wakeup(&rf_sparet_wait_queue);
1392
1393 /* mpsleep unlocks the mutex */
1394 while (!rf_sparet_resp_queue) {
1395 tsleep(&rf_sparet_resp_queue, PRIBIO,
1396 "raidframe getsparetable", 0);
1397 }
1398 req = rf_sparet_resp_queue;
1399 rf_sparet_resp_queue = req->next;
1400 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1401
1402 retcode = req->fcol;
1403 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1404 * alloc'd */
1405 return (retcode);
1406 }
1407 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1408 * bp & passes it down.
1409 * any calls originating in the kernel must use non-blocking I/O
1410 * do some extra sanity checking to return "appropriate" error values for
1411 * certain conditions (to make some standard utilities work)
1412 *
1413 * Formerly known as: rf_DoAccessKernel
1414 */
1415 void
1416 raidstart(raidPtr)
1417 RF_Raid_t *raidPtr;
1418 {
1419 RF_SectorCount_t num_blocks, pb, sum;
1420 RF_RaidAddr_t raid_addr;
1421 int retcode;
1422 struct partition *pp;
1423 daddr_t blocknum;
1424 int unit;
1425 struct raid_softc *rs;
1426 int do_async;
1427 struct buf *bp;
1428 struct buf *dp;
1429
1430 unit = raidPtr->raidid;
1431 rs = &raid_softc[unit];
1432
1433 /* Check to see if we're at the limit... */
1434 RF_LOCK_MUTEX(raidPtr->mutex);
1435 while (raidPtr->openings > 0) {
1436 RF_UNLOCK_MUTEX(raidPtr->mutex);
1437
1438 /* get the next item, if any, from the queue */
1439 dp = &rs->buf_queue;
1440 bp = dp->b_actf;
1441 if (bp == NULL) {
1442 /* nothing more to do */
1443 return;
1444 }
1445
1446 /* update structures */
1447 dp = bp->b_actf;
1448 if (dp != NULL) {
1449 dp->b_actb = bp->b_actb;
1450 } else {
1451 rs->buf_queue.b_actb = bp->b_actb;
1452 }
1453 *bp->b_actb = dp;
1454
1455 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1456 * partition.. Need to make it absolute to the underlying
1457 * device.. */
1458
1459 blocknum = bp->b_blkno;
1460 if (DISKPART(bp->b_dev) != RAW_PART) {
1461 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1462 blocknum += pp->p_offset;
1463 }
1464
1465 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1466 (int) blocknum));
1467
1468 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1469 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1470
1471 /* *THIS* is where we adjust what block we're going to...
1472 * but DO NOT TOUCH bp->b_blkno!!! */
1473 raid_addr = blocknum;
1474
1475 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1476 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1477 sum = raid_addr + num_blocks + pb;
1478 if (1 || rf_debugKernelAccess) {
1479 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1480 (int) raid_addr, (int) sum, (int) num_blocks,
1481 (int) pb, (int) bp->b_resid));
1482 }
1483 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1484 || (sum < num_blocks) || (sum < pb)) {
1485 bp->b_error = ENOSPC;
1486 bp->b_flags |= B_ERROR;
1487 bp->b_resid = bp->b_bcount;
1488 biodone(bp);
1489 RF_LOCK_MUTEX(raidPtr->mutex);
1490 continue;
1491 }
1492 /*
1493 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1494 */
1495
1496 if (bp->b_bcount & raidPtr->sectorMask) {
1497 bp->b_error = EINVAL;
1498 bp->b_flags |= B_ERROR;
1499 bp->b_resid = bp->b_bcount;
1500 biodone(bp);
1501 RF_LOCK_MUTEX(raidPtr->mutex);
1502 continue;
1503
1504 }
1505 db1_printf(("Calling DoAccess..\n"));
1506
1507
1508 RF_LOCK_MUTEX(raidPtr->mutex);
1509 raidPtr->openings--;
1510 RF_UNLOCK_MUTEX(raidPtr->mutex);
1511
1512 /*
1513 * Everything is async.
1514 */
1515 do_async = 1;
1516
1517 /* don't ever condition on bp->b_flags & B_WRITE.
1518 * always condition on B_READ instead */
1519
1520 /* XXX we're still at splbio() here... do we *really*
1521 need to be? */
1522
1523
1524 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1525 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1526 do_async, raid_addr, num_blocks,
1527 bp->b_un.b_addr, bp, NULL, NULL,
1528 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1529
1530
1531 RF_LOCK_MUTEX(raidPtr->mutex);
1532 }
1533 RF_UNLOCK_MUTEX(raidPtr->mutex);
1534 }
1535
1536
1537
1538
1539 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1540
1541 int
1542 rf_DispatchKernelIO(queue, req)
1543 RF_DiskQueue_t *queue;
1544 RF_DiskQueueData_t *req;
1545 {
1546 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1547 struct buf *bp;
1548 struct raidbuf *raidbp = NULL;
1549 struct raid_softc *rs;
1550 int unit;
1551 int s;
1552
1553 s=0;
1554 /* s = splbio();*/ /* want to test this */
1555 /* XXX along with the vnode, we also need the softc associated with
1556 * this device.. */
1557
1558 req->queue = queue;
1559
1560 unit = queue->raidPtr->raidid;
1561
1562 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1563
1564 if (unit >= numraid) {
1565 printf("Invalid unit number: %d %d\n", unit, numraid);
1566 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1567 }
1568 rs = &raid_softc[unit];
1569
1570 /* XXX is this the right place? */
1571 disk_busy(&rs->sc_dkdev);
1572
1573 bp = req->bp;
1574 #if 1
1575 /* XXX when there is a physical disk failure, someone is passing us a
1576 * buffer that contains old stuff!! Attempt to deal with this problem
1577 * without taking a performance hit... (not sure where the real bug
1578 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1579
1580 if (bp->b_flags & B_ERROR) {
1581 bp->b_flags &= ~B_ERROR;
1582 }
1583 if (bp->b_error != 0) {
1584 bp->b_error = 0;
1585 }
1586 #endif
1587 raidbp = RAIDGETBUF(rs);
1588
1589 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1590
1591 /*
1592 * context for raidiodone
1593 */
1594 raidbp->rf_obp = bp;
1595 raidbp->req = req;
1596
1597 LIST_INIT(&raidbp->rf_buf.b_dep);
1598
1599 switch (req->type) {
1600 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1601 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1602 * queue->row, queue->col); */
1603 /* XXX need to do something extra here.. */
1604 /* I'm leaving this in, as I've never actually seen it used,
1605 * and I'd like folks to report it... GO */
1606 printf(("WAKEUP CALLED\n"));
1607 queue->numOutstanding++;
1608
1609 /* XXX need to glue the original buffer into this?? */
1610
1611 KernelWakeupFunc(&raidbp->rf_buf);
1612 break;
1613
1614 case RF_IO_TYPE_READ:
1615 case RF_IO_TYPE_WRITE:
1616
1617 if (req->tracerec) {
1618 RF_ETIMER_START(req->tracerec->timer);
1619 }
1620 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1621 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1622 req->sectorOffset, req->numSector,
1623 req->buf, KernelWakeupFunc, (void *) req,
1624 queue->raidPtr->logBytesPerSector, req->b_proc);
1625
1626 if (rf_debugKernelAccess) {
1627 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1628 (long) bp->b_blkno));
1629 }
1630 queue->numOutstanding++;
1631 queue->last_deq_sector = req->sectorOffset;
1632 /* acc wouldn't have been let in if there were any pending
1633 * reqs at any other priority */
1634 queue->curPriority = req->priority;
1635 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1636 * req->type, queue->row, queue->col); */
1637
1638 db1_printf(("Going for %c to unit %d row %d col %d\n",
1639 req->type, unit, queue->row, queue->col));
1640 db1_printf(("sector %d count %d (%d bytes) %d\n",
1641 (int) req->sectorOffset, (int) req->numSector,
1642 (int) (req->numSector <<
1643 queue->raidPtr->logBytesPerSector),
1644 (int) queue->raidPtr->logBytesPerSector));
1645 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1646 raidbp->rf_buf.b_vp->v_numoutput++;
1647 }
1648 VOP_STRATEGY(&raidbp->rf_buf);
1649
1650 break;
1651
1652 default:
1653 panic("bad req->type in rf_DispatchKernelIO");
1654 }
1655 db1_printf(("Exiting from DispatchKernelIO\n"));
1656 /* splx(s); */ /* want to test this */
1657 return (0);
1658 }
1659 /* this is the callback function associated with a I/O invoked from
1660 kernel code.
1661 */
1662 static void
1663 KernelWakeupFunc(vbp)
1664 struct buf *vbp;
1665 {
1666 RF_DiskQueueData_t *req = NULL;
1667 RF_DiskQueue_t *queue;
1668 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1669 struct buf *bp;
1670 struct raid_softc *rs;
1671 int unit;
1672 register int s;
1673
1674 s = splbio();
1675 db1_printf(("recovering the request queue:\n"));
1676 req = raidbp->req;
1677
1678 bp = raidbp->rf_obp;
1679
1680 queue = (RF_DiskQueue_t *) req->queue;
1681
1682 if (raidbp->rf_buf.b_flags & B_ERROR) {
1683 bp->b_flags |= B_ERROR;
1684 bp->b_error = raidbp->rf_buf.b_error ?
1685 raidbp->rf_buf.b_error : EIO;
1686 }
1687
1688 /* XXX methinks this could be wrong... */
1689 #if 1
1690 bp->b_resid = raidbp->rf_buf.b_resid;
1691 #endif
1692
1693 if (req->tracerec) {
1694 RF_ETIMER_STOP(req->tracerec->timer);
1695 RF_ETIMER_EVAL(req->tracerec->timer);
1696 RF_LOCK_MUTEX(rf_tracing_mutex);
1697 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1698 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1699 req->tracerec->num_phys_ios++;
1700 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1701 }
1702 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1703
1704 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1705
1706
1707 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1708 * ballistic, and mark the component as hosed... */
1709
1710 if (bp->b_flags & B_ERROR) {
1711 /* Mark the disk as dead */
1712 /* but only mark it once... */
1713 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1714 rf_ds_optimal) {
1715 printf("raid%d: IO Error. Marking %s as failed.\n",
1716 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1717 queue->raidPtr->Disks[queue->row][queue->col].status =
1718 rf_ds_failed;
1719 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1720 queue->raidPtr->numFailures++;
1721 /* XXX here we should bump the version number for each component, and write that data out */
1722 } else { /* Disk is already dead... */
1723 /* printf("Disk already marked as dead!\n"); */
1724 }
1725
1726 }
1727
1728 rs = &raid_softc[unit];
1729 RAIDPUTBUF(rs, raidbp);
1730
1731
1732 if (bp->b_resid == 0) {
1733 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1734 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1735 }
1736
1737 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1738 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1739
1740 splx(s);
1741 }
1742
1743
1744
1745 /*
1746 * initialize a buf structure for doing an I/O in the kernel.
1747 */
1748 static void
1749 InitBP(
1750 struct buf * bp,
1751 struct vnode * b_vp,
1752 unsigned rw_flag,
1753 dev_t dev,
1754 RF_SectorNum_t startSect,
1755 RF_SectorCount_t numSect,
1756 caddr_t buf,
1757 void (*cbFunc) (struct buf *),
1758 void *cbArg,
1759 int logBytesPerSector,
1760 struct proc * b_proc)
1761 {
1762 /* bp->b_flags = B_PHYS | rw_flag; */
1763 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1764 bp->b_bcount = numSect << logBytesPerSector;
1765 bp->b_bufsize = bp->b_bcount;
1766 bp->b_error = 0;
1767 bp->b_dev = dev;
1768 bp->b_un.b_addr = buf;
1769 bp->b_blkno = startSect;
1770 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1771 if (bp->b_bcount == 0) {
1772 panic("bp->b_bcount is zero in InitBP!!\n");
1773 }
1774 bp->b_proc = b_proc;
1775 bp->b_iodone = cbFunc;
1776 bp->b_vp = b_vp;
1777
1778 }
1779
1780 static void
1781 raidgetdefaultlabel(raidPtr, rs, lp)
1782 RF_Raid_t *raidPtr;
1783 struct raid_softc *rs;
1784 struct disklabel *lp;
1785 {
1786 db1_printf(("Building a default label...\n"));
1787 bzero(lp, sizeof(*lp));
1788
1789 /* fabricate a label... */
1790 lp->d_secperunit = raidPtr->totalSectors;
1791 lp->d_secsize = raidPtr->bytesPerSector;
1792 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1793 lp->d_ntracks = 1;
1794 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1795 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1796
1797 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1798 lp->d_type = DTYPE_RAID;
1799 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1800 lp->d_rpm = 3600;
1801 lp->d_interleave = 1;
1802 lp->d_flags = 0;
1803
1804 lp->d_partitions[RAW_PART].p_offset = 0;
1805 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1806 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1807 lp->d_npartitions = RAW_PART + 1;
1808
1809 lp->d_magic = DISKMAGIC;
1810 lp->d_magic2 = DISKMAGIC;
1811 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1812
1813 }
1814 /*
1815 * Read the disklabel from the raid device. If one is not present, fake one
1816 * up.
1817 */
1818 static void
1819 raidgetdisklabel(dev)
1820 dev_t dev;
1821 {
1822 int unit = raidunit(dev);
1823 struct raid_softc *rs = &raid_softc[unit];
1824 char *errstring;
1825 struct disklabel *lp = rs->sc_dkdev.dk_label;
1826 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1827 RF_Raid_t *raidPtr;
1828
1829 db1_printf(("Getting the disklabel...\n"));
1830
1831 bzero(clp, sizeof(*clp));
1832
1833 raidPtr = raidPtrs[unit];
1834
1835 raidgetdefaultlabel(raidPtr, rs, lp);
1836
1837 /*
1838 * Call the generic disklabel extraction routine.
1839 */
1840 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1841 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1842 if (errstring)
1843 raidmakedisklabel(rs);
1844 else {
1845 int i;
1846 struct partition *pp;
1847
1848 /*
1849 * Sanity check whether the found disklabel is valid.
1850 *
1851 * This is necessary since total size of the raid device
1852 * may vary when an interleave is changed even though exactly
1853 * same componets are used, and old disklabel may used
1854 * if that is found.
1855 */
1856 if (lp->d_secperunit != rs->sc_size)
1857 printf("WARNING: %s: "
1858 "total sector size in disklabel (%d) != "
1859 "the size of raid (%ld)\n", rs->sc_xname,
1860 lp->d_secperunit, (long) rs->sc_size);
1861 for (i = 0; i < lp->d_npartitions; i++) {
1862 pp = &lp->d_partitions[i];
1863 if (pp->p_offset + pp->p_size > rs->sc_size)
1864 printf("WARNING: %s: end of partition `%c' "
1865 "exceeds the size of raid (%ld)\n",
1866 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1867 }
1868 }
1869
1870 }
1871 /*
1872 * Take care of things one might want to take care of in the event
1873 * that a disklabel isn't present.
1874 */
1875 static void
1876 raidmakedisklabel(rs)
1877 struct raid_softc *rs;
1878 {
1879 struct disklabel *lp = rs->sc_dkdev.dk_label;
1880 db1_printf(("Making a label..\n"));
1881
1882 /*
1883 * For historical reasons, if there's no disklabel present
1884 * the raw partition must be marked FS_BSDFFS.
1885 */
1886
1887 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1888
1889 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1890
1891 lp->d_checksum = dkcksum(lp);
1892 }
1893 /*
1894 * Lookup the provided name in the filesystem. If the file exists,
1895 * is a valid block device, and isn't being used by anyone else,
1896 * set *vpp to the file's vnode.
1897 * You'll find the original of this in ccd.c
1898 */
1899 int
1900 raidlookup(path, p, vpp)
1901 char *path;
1902 struct proc *p;
1903 struct vnode **vpp; /* result */
1904 {
1905 struct nameidata nd;
1906 struct vnode *vp;
1907 struct vattr va;
1908 int error;
1909
1910 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1911 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1912 #ifdef DEBUG
1913 printf("RAIDframe: vn_open returned %d\n", error);
1914 #endif
1915 return (error);
1916 }
1917 vp = nd.ni_vp;
1918 if (vp->v_usecount > 1) {
1919 VOP_UNLOCK(vp, 0);
1920 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1921 return (EBUSY);
1922 }
1923 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1924 VOP_UNLOCK(vp, 0);
1925 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1926 return (error);
1927 }
1928 /* XXX: eventually we should handle VREG, too. */
1929 if (va.va_type != VBLK) {
1930 VOP_UNLOCK(vp, 0);
1931 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1932 return (ENOTBLK);
1933 }
1934 VOP_UNLOCK(vp, 0);
1935 *vpp = vp;
1936 return (0);
1937 }
1938 /*
1939 * Wait interruptibly for an exclusive lock.
1940 *
1941 * XXX
1942 * Several drivers do this; it should be abstracted and made MP-safe.
1943 * (Hmm... where have we seen this warning before :-> GO )
1944 */
1945 static int
1946 raidlock(rs)
1947 struct raid_softc *rs;
1948 {
1949 int error;
1950
1951 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1952 rs->sc_flags |= RAIDF_WANTED;
1953 if ((error =
1954 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1955 return (error);
1956 }
1957 rs->sc_flags |= RAIDF_LOCKED;
1958 return (0);
1959 }
1960 /*
1961 * Unlock and wake up any waiters.
1962 */
1963 static void
1964 raidunlock(rs)
1965 struct raid_softc *rs;
1966 {
1967
1968 rs->sc_flags &= ~RAIDF_LOCKED;
1969 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1970 rs->sc_flags &= ~RAIDF_WANTED;
1971 wakeup(rs);
1972 }
1973 }
1974
1975
1976 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1977 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1978
1979 int
1980 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1981 {
1982 RF_ComponentLabel_t component_label;
1983 raidread_component_label(dev, b_vp, &component_label);
1984 component_label.mod_counter = mod_counter;
1985 component_label.clean = RF_RAID_CLEAN;
1986 raidwrite_component_label(dev, b_vp, &component_label);
1987 return(0);
1988 }
1989
1990
1991 int
1992 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1993 {
1994 RF_ComponentLabel_t component_label;
1995 raidread_component_label(dev, b_vp, &component_label);
1996 component_label.mod_counter = mod_counter;
1997 component_label.clean = RF_RAID_DIRTY;
1998 raidwrite_component_label(dev, b_vp, &component_label);
1999 return(0);
2000 }
2001
2002 /* ARGSUSED */
2003 int
2004 raidread_component_label(dev, b_vp, component_label)
2005 dev_t dev;
2006 struct vnode *b_vp;
2007 RF_ComponentLabel_t *component_label;
2008 {
2009 struct buf *bp;
2010 int error;
2011
2012 /* XXX should probably ensure that we don't try to do this if
2013 someone has changed rf_protected_sectors. */
2014
2015 /* get a block of the appropriate size... */
2016 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2017 bp->b_dev = dev;
2018
2019 /* get our ducks in a row for the read */
2020 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2021 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2022 bp->b_flags = B_BUSY | B_READ;
2023 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2024
2025 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2026
2027 error = biowait(bp);
2028
2029 if (!error) {
2030 memcpy(component_label, bp->b_un.b_addr,
2031 sizeof(RF_ComponentLabel_t));
2032 #if 0
2033 printf("raidread_component_label: got component label:\n");
2034 printf("Version: %d\n",component_label->version);
2035 printf("Serial Number: %d\n",component_label->serial_number);
2036 printf("Mod counter: %d\n",component_label->mod_counter);
2037 printf("Row: %d\n", component_label->row);
2038 printf("Column: %d\n", component_label->column);
2039 printf("Num Rows: %d\n", component_label->num_rows);
2040 printf("Num Columns: %d\n", component_label->num_columns);
2041 printf("Clean: %d\n", component_label->clean);
2042 printf("Status: %d\n", component_label->status);
2043 #endif
2044 } else {
2045 printf("Failed to read RAID component label!\n");
2046 }
2047
2048 bp->b_flags = B_INVAL | B_AGE;
2049 brelse(bp);
2050 return(error);
2051 }
2052 /* ARGSUSED */
2053 int
2054 raidwrite_component_label(dev, b_vp, component_label)
2055 dev_t dev;
2056 struct vnode *b_vp;
2057 RF_ComponentLabel_t *component_label;
2058 {
2059 struct buf *bp;
2060 int error;
2061
2062 /* get a block of the appropriate size... */
2063 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2064 bp->b_dev = dev;
2065
2066 /* get our ducks in a row for the write */
2067 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2068 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2069 bp->b_flags = B_BUSY | B_WRITE;
2070 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2071
2072 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2073
2074 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2075
2076 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2077 error = biowait(bp);
2078 bp->b_flags = B_INVAL | B_AGE;
2079 brelse(bp);
2080 if (error) {
2081 printf("Failed to write RAID component info!\n");
2082 }
2083
2084 return(error);
2085 }
2086
2087 void
2088 rf_markalldirty( raidPtr )
2089 RF_Raid_t *raidPtr;
2090 {
2091 RF_ComponentLabel_t c_label;
2092 int r,c;
2093
2094 raidPtr->mod_counter++;
2095 for (r = 0; r < raidPtr->numRow; r++) {
2096 for (c = 0; c < raidPtr->numCol; c++) {
2097 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2098 raidread_component_label(
2099 raidPtr->Disks[r][c].dev,
2100 raidPtr->raid_cinfo[r][c].ci_vp,
2101 &c_label);
2102 if (c_label.status == rf_ds_spared) {
2103 /* XXX do something special...
2104 but whatever you do, don't
2105 try to access it!! */
2106 } else {
2107 #if 0
2108 c_label.status =
2109 raidPtr->Disks[r][c].status;
2110 raidwrite_component_label(
2111 raidPtr->Disks[r][c].dev,
2112 raidPtr->raid_cinfo[r][c].ci_vp,
2113 &c_label);
2114 #endif
2115 raidmarkdirty(
2116 raidPtr->Disks[r][c].dev,
2117 raidPtr->raid_cinfo[r][c].ci_vp,
2118 raidPtr->mod_counter);
2119 }
2120 }
2121 }
2122 }
2123 /* printf("Component labels marked dirty.\n"); */
2124 #if 0
2125 for( c = 0; c < raidPtr->numSpare ; c++) {
2126 sparecol = raidPtr->numCol + c;
2127 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2128 /*
2129
2130 XXX this is where we get fancy and map this spare
2131 into it's correct spot in the array.
2132
2133 */
2134 /*
2135
2136 we claim this disk is "optimal" if it's
2137 rf_ds_used_spare, as that means it should be
2138 directly substitutable for the disk it replaced.
2139 We note that too...
2140
2141 */
2142
2143 for(i=0;i<raidPtr->numRow;i++) {
2144 for(j=0;j<raidPtr->numCol;j++) {
2145 if ((raidPtr->Disks[i][j].spareRow ==
2146 r) &&
2147 (raidPtr->Disks[i][j].spareCol ==
2148 sparecol)) {
2149 srow = r;
2150 scol = sparecol;
2151 break;
2152 }
2153 }
2154 }
2155
2156 raidread_component_label(
2157 raidPtr->Disks[r][sparecol].dev,
2158 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2159 &c_label);
2160 /* make sure status is noted */
2161 c_label.version = RF_COMPONENT_LABEL_VERSION;
2162 c_label.mod_counter = raidPtr->mod_counter;
2163 c_label.serial_number = raidPtr->serial_number;
2164 c_label.row = srow;
2165 c_label.column = scol;
2166 c_label.num_rows = raidPtr->numRow;
2167 c_label.num_columns = raidPtr->numCol;
2168 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2169 c_label.status = rf_ds_optimal;
2170 raidwrite_component_label(
2171 raidPtr->Disks[r][sparecol].dev,
2172 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2173 &c_label);
2174 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2175 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2176 }
2177 }
2178
2179 #endif
2180 }
2181
2182
2183 void
2184 rf_update_component_labels( raidPtr )
2185 RF_Raid_t *raidPtr;
2186 {
2187 RF_ComponentLabel_t c_label;
2188 int sparecol;
2189 int r,c;
2190 int i,j;
2191 int srow, scol;
2192
2193 srow = -1;
2194 scol = -1;
2195
2196 /* XXX should do extra checks to make sure things really are clean,
2197 rather than blindly setting the clean bit... */
2198
2199 raidPtr->mod_counter++;
2200
2201 for (r = 0; r < raidPtr->numRow; r++) {
2202 for (c = 0; c < raidPtr->numCol; c++) {
2203 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2204 raidread_component_label(
2205 raidPtr->Disks[r][c].dev,
2206 raidPtr->raid_cinfo[r][c].ci_vp,
2207 &c_label);
2208 /* make sure status is noted */
2209 c_label.status = rf_ds_optimal;
2210 raidwrite_component_label(
2211 raidPtr->Disks[r][c].dev,
2212 raidPtr->raid_cinfo[r][c].ci_vp,
2213 &c_label);
2214 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2215 raidmarkclean(
2216 raidPtr->Disks[r][c].dev,
2217 raidPtr->raid_cinfo[r][c].ci_vp,
2218 raidPtr->mod_counter);
2219 }
2220 }
2221 /* else we don't touch it.. */
2222 #if 0
2223 else if (raidPtr->Disks[r][c].status !=
2224 rf_ds_failed) {
2225 raidread_component_label(
2226 raidPtr->Disks[r][c].dev,
2227 raidPtr->raid_cinfo[r][c].ci_vp,
2228 &c_label);
2229 /* make sure status is noted */
2230 c_label.status =
2231 raidPtr->Disks[r][c].status;
2232 raidwrite_component_label(
2233 raidPtr->Disks[r][c].dev,
2234 raidPtr->raid_cinfo[r][c].ci_vp,
2235 &c_label);
2236 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2237 raidmarkclean(
2238 raidPtr->Disks[r][c].dev,
2239 raidPtr->raid_cinfo[r][c].ci_vp,
2240 raidPtr->mod_counter);
2241 }
2242 }
2243 #endif
2244 }
2245 }
2246
2247 for( c = 0; c < raidPtr->numSpare ; c++) {
2248 sparecol = raidPtr->numCol + c;
2249 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2250 /*
2251
2252 we claim this disk is "optimal" if it's
2253 rf_ds_used_spare, as that means it should be
2254 directly substitutable for the disk it replaced.
2255 We note that too...
2256
2257 */
2258
2259 for(i=0;i<raidPtr->numRow;i++) {
2260 for(j=0;j<raidPtr->numCol;j++) {
2261 if ((raidPtr->Disks[i][j].spareRow ==
2262 0) &&
2263 (raidPtr->Disks[i][j].spareCol ==
2264 sparecol)) {
2265 srow = i;
2266 scol = j;
2267 break;
2268 }
2269 }
2270 }
2271
2272 raidread_component_label(
2273 raidPtr->Disks[0][sparecol].dev,
2274 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2275 &c_label);
2276 /* make sure status is noted */
2277 c_label.version = RF_COMPONENT_LABEL_VERSION;
2278 c_label.mod_counter = raidPtr->mod_counter;
2279 c_label.serial_number = raidPtr->serial_number;
2280 c_label.row = srow;
2281 c_label.column = scol;
2282 c_label.num_rows = raidPtr->numRow;
2283 c_label.num_columns = raidPtr->numCol;
2284 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2285 c_label.status = rf_ds_optimal;
2286 raidwrite_component_label(
2287 raidPtr->Disks[0][sparecol].dev,
2288 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2289 &c_label);
2290 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2291 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2292 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2293 raidPtr->mod_counter);
2294 }
2295 }
2296 }
2297 /* printf("Component labels updated\n"); */
2298 }
2299
2300 void
2301 rf_ReconThread(req)
2302 struct rf_recon_req *req;
2303 {
2304 int s;
2305 RF_Raid_t *raidPtr;
2306
2307 s = splbio();
2308 raidPtr = (RF_Raid_t *) req->raidPtr;
2309 raidPtr->recon_in_progress = 1;
2310
2311 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2312 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2313
2314 /* XXX get rid of this! we don't need it at all.. */
2315 RF_Free(req, sizeof(*req));
2316
2317 raidPtr->recon_in_progress = 0;
2318 splx(s);
2319
2320 /* That's all... */
2321 kthread_exit(0); /* does not return */
2322 }
2323
2324 void
2325 rf_RewriteParityThread(raidPtr)
2326 RF_Raid_t *raidPtr;
2327 {
2328 int retcode;
2329 int s;
2330
2331 raidPtr->parity_rewrite_in_progress = 1;
2332 s = splbio();
2333 retcode = rf_RewriteParity(raidPtr);
2334 splx(s);
2335 if (retcode) {
2336 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2337 } else {
2338 /* set the clean bit! If we shutdown correctly,
2339 the clean bit on each component label will get
2340 set */
2341 raidPtr->parity_good = RF_RAID_CLEAN;
2342 }
2343 raidPtr->parity_rewrite_in_progress = 0;
2344
2345 /* That's all... */
2346 kthread_exit(0); /* does not return */
2347 }
2348
2349
2350 void
2351 rf_CopybackThread(raidPtr)
2352 RF_Raid_t *raidPtr;
2353 {
2354 int s;
2355
2356 raidPtr->copyback_in_progress = 1;
2357 s = splbio();
2358 rf_CopybackReconstructedData(raidPtr);
2359 splx(s);
2360 raidPtr->copyback_in_progress = 0;
2361
2362 /* That's all... */
2363 kthread_exit(0); /* does not return */
2364 }
2365
2366
2367 void
2368 rf_ReconstructInPlaceThread(req)
2369 struct rf_recon_req *req;
2370 {
2371 int retcode;
2372 int s;
2373 RF_Raid_t *raidPtr;
2374
2375 s = splbio();
2376 raidPtr = req->raidPtr;
2377 raidPtr->recon_in_progress = 1;
2378 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2379 RF_Free(req, sizeof(*req));
2380 raidPtr->recon_in_progress = 0;
2381 splx(s);
2382
2383 /* That's all... */
2384 kthread_exit(0); /* does not return */
2385 }
2386