rf_netbsdkintf.c revision 1.40 1 /* $NetBSD: rf_netbsdkintf.c,v 1.40 2000/01/07 13:57:20 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf * bp);
184 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
185 dev_t dev, RF_SectorNum_t startSect,
186 RF_SectorCount_t numSect, caddr_t buf,
187 void (*cbFunc) (struct buf *), void *cbArg,
188 int logBytesPerSector, struct proc * b_proc);
189
190 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
191 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
192
193 void raidattach __P((int));
194 int raidsize __P((dev_t));
195
196 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
197 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
198 static int raidinit __P((dev_t, RF_Raid_t *, int));
199
200 int raidopen __P((dev_t, int, int, struct proc *));
201 int raidclose __P((dev_t, int, int, struct proc *));
202 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
203 int raidwrite __P((dev_t, struct uio *, int));
204 int raidread __P((dev_t, struct uio *, int));
205 void raidstrategy __P((struct buf *));
206 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
207
208 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
209 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
210 void rf_update_component_labels( RF_Raid_t *);
211 /*
212 * Pilfered from ccd.c
213 */
214
215 struct raidbuf {
216 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
217 struct buf *rf_obp; /* ptr. to original I/O buf */
218 int rf_flags; /* misc. flags */
219 RF_DiskQueueData_t *req;/* the request that this was part of.. */
220 };
221
222
223 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
224 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
225
226 /* XXX Not sure if the following should be replacing the raidPtrs above,
227 or if it should be used in conjunction with that... */
228
229 struct raid_softc {
230 int sc_flags; /* flags */
231 int sc_cflags; /* configuration flags */
232 size_t sc_size; /* size of the raid device */
233 dev_t sc_dev; /* our device.. */
234 char sc_xname[20]; /* XXX external name */
235 struct disk sc_dkdev; /* generic disk device info */
236 struct pool sc_cbufpool; /* component buffer pool */
237 struct buf buf_queue; /* used for the device queue */
238 };
239 /* sc_flags */
240 #define RAIDF_INITED 0x01 /* unit has been initialized */
241 #define RAIDF_WLABEL 0x02 /* label area is writable */
242 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
243 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
244 #define RAIDF_LOCKED 0x80 /* unit is locked */
245
246 #define raidunit(x) DISKUNIT(x)
247 static int numraid = 0;
248
249 /*
250 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
251 * Be aware that large numbers can allow the driver to consume a lot of
252 * kernel memory, especially on writes, and in degraded mode reads.
253 *
254 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
255 * a single 64K write will typically require 64K for the old data,
256 * 64K for the old parity, and 64K for the new parity, for a total
257 * of 192K (if the parity buffer is not re-used immediately).
258 * Even it if is used immedately, that's still 128K, which when multiplied
259 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
260 *
261 * Now in degraded mode, for example, a 64K read on the above setup may
262 * require data reconstruction, which will require *all* of the 4 remaining
263 * disks to participate -- 4 * 32K/disk == 128K again.
264 */
265
266 #ifndef RAIDOUTSTANDING
267 #define RAIDOUTSTANDING 6
268 #endif
269
270 #define RAIDLABELDEV(dev) \
271 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
272
273 /* declared here, and made public, for the benefit of KVM stuff.. */
274 struct raid_softc *raid_softc;
275
276 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
277 struct disklabel *));
278 static void raidgetdisklabel __P((dev_t));
279 static void raidmakedisklabel __P((struct raid_softc *));
280
281 static int raidlock __P((struct raid_softc *));
282 static void raidunlock __P((struct raid_softc *));
283 int raidlookup __P((char *, struct proc * p, struct vnode **));
284
285 static void rf_markalldirty __P((RF_Raid_t *));
286
287 void rf_ReconThread __P((struct rf_recon_req *));
288 /* XXX what I want is: */
289 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
290 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
291 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
292 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
293
294 void
295 raidattach(num)
296 int num;
297 {
298 int raidID;
299 int i, rc;
300
301 #ifdef DEBUG
302 printf("raidattach: Asked for %d units\n", num);
303 #endif
304
305 if (num <= 0) {
306 #ifdef DIAGNOSTIC
307 panic("raidattach: count <= 0");
308 #endif
309 return;
310 }
311 /* This is where all the initialization stuff gets done. */
312
313 /* Make some space for requested number of units... */
314
315 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
316 if (raidPtrs == NULL) {
317 panic("raidPtrs is NULL!!\n");
318 }
319
320 rc = rf_mutex_init(&rf_sparet_wait_mutex);
321 if (rc) {
322 RF_PANIC();
323 }
324
325 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
326
327 for (i = 0; i < numraid; i++)
328 raidPtrs[i] = NULL;
329 rc = rf_BootRaidframe();
330 if (rc == 0)
331 printf("Kernelized RAIDframe activated\n");
332 else
333 panic("Serious error booting RAID!!\n");
334
335 /* put together some datastructures like the CCD device does.. This
336 * lets us lock the device and what-not when it gets opened. */
337
338 raid_softc = (struct raid_softc *)
339 malloc(num * sizeof(struct raid_softc),
340 M_RAIDFRAME, M_NOWAIT);
341 if (raid_softc == NULL) {
342 printf("WARNING: no memory for RAIDframe driver\n");
343 return;
344 }
345 numraid = num;
346 bzero(raid_softc, num * sizeof(struct raid_softc));
347
348 for (raidID = 0; raidID < num; raidID++) {
349 raid_softc[raidID].buf_queue.b_actf = NULL;
350 raid_softc[raidID].buf_queue.b_actb =
351 &raid_softc[raidID].buf_queue.b_actf;
352 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
353 (RF_Raid_t *));
354 if (raidPtrs[raidID] == NULL) {
355 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
356 numraid = raidID;
357 return;
358 }
359 }
360 }
361
362
363 int
364 raidsize(dev)
365 dev_t dev;
366 {
367 struct raid_softc *rs;
368 struct disklabel *lp;
369 int part, unit, omask, size;
370
371 unit = raidunit(dev);
372 if (unit >= numraid)
373 return (-1);
374 rs = &raid_softc[unit];
375
376 if ((rs->sc_flags & RAIDF_INITED) == 0)
377 return (-1);
378
379 part = DISKPART(dev);
380 omask = rs->sc_dkdev.dk_openmask & (1 << part);
381 lp = rs->sc_dkdev.dk_label;
382
383 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
384 return (-1);
385
386 if (lp->d_partitions[part].p_fstype != FS_SWAP)
387 size = -1;
388 else
389 size = lp->d_partitions[part].p_size *
390 (lp->d_secsize / DEV_BSIZE);
391
392 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
393 return (-1);
394
395 return (size);
396
397 }
398
399 int
400 raiddump(dev, blkno, va, size)
401 dev_t dev;
402 daddr_t blkno;
403 caddr_t va;
404 size_t size;
405 {
406 /* Not implemented. */
407 return ENXIO;
408 }
409 /* ARGSUSED */
410 int
411 raidopen(dev, flags, fmt, p)
412 dev_t dev;
413 int flags, fmt;
414 struct proc *p;
415 {
416 int unit = raidunit(dev);
417 struct raid_softc *rs;
418 struct disklabel *lp;
419 int part, pmask;
420 int error = 0;
421
422 if (unit >= numraid)
423 return (ENXIO);
424 rs = &raid_softc[unit];
425
426 if ((error = raidlock(rs)) != 0)
427 return (error);
428 lp = rs->sc_dkdev.dk_label;
429
430 part = DISKPART(dev);
431 pmask = (1 << part);
432
433 db1_printf(("Opening raid device number: %d partition: %d\n",
434 unit, part));
435
436
437 if ((rs->sc_flags & RAIDF_INITED) &&
438 (rs->sc_dkdev.dk_openmask == 0))
439 raidgetdisklabel(dev);
440
441 /* make sure that this partition exists */
442
443 if (part != RAW_PART) {
444 db1_printf(("Not a raw partition..\n"));
445 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
446 ((part >= lp->d_npartitions) ||
447 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
448 error = ENXIO;
449 raidunlock(rs);
450 db1_printf(("Bailing out...\n"));
451 return (error);
452 }
453 }
454 /* Prevent this unit from being unconfigured while open. */
455 switch (fmt) {
456 case S_IFCHR:
457 rs->sc_dkdev.dk_copenmask |= pmask;
458 break;
459
460 case S_IFBLK:
461 rs->sc_dkdev.dk_bopenmask |= pmask;
462 break;
463 }
464
465 if ((rs->sc_dkdev.dk_openmask == 0) &&
466 ((rs->sc_flags & RAIDF_INITED) != 0)) {
467 /* First one... mark things as dirty... Note that we *MUST*
468 have done a configure before this. I DO NOT WANT TO BE
469 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
470 THAT THEY BELONG TOGETHER!!!!! */
471 /* XXX should check to see if we're only open for reading
472 here... If so, we needn't do this, but then need some
473 other way of keeping track of what's happened.. */
474
475 rf_markalldirty( raidPtrs[unit] );
476 }
477
478
479 rs->sc_dkdev.dk_openmask =
480 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
481
482 raidunlock(rs);
483
484 return (error);
485
486
487 }
488 /* ARGSUSED */
489 int
490 raidclose(dev, flags, fmt, p)
491 dev_t dev;
492 int flags, fmt;
493 struct proc *p;
494 {
495 int unit = raidunit(dev);
496 struct raid_softc *rs;
497 int error = 0;
498 int part;
499
500 if (unit >= numraid)
501 return (ENXIO);
502 rs = &raid_softc[unit];
503
504 if ((error = raidlock(rs)) != 0)
505 return (error);
506
507 part = DISKPART(dev);
508
509 /* ...that much closer to allowing unconfiguration... */
510 switch (fmt) {
511 case S_IFCHR:
512 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
513 break;
514
515 case S_IFBLK:
516 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
517 break;
518 }
519 rs->sc_dkdev.dk_openmask =
520 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
521
522 if ((rs->sc_dkdev.dk_openmask == 0) &&
523 ((rs->sc_flags & RAIDF_INITED) != 0)) {
524 /* Last one... device is not unconfigured yet.
525 Device shutdown has taken care of setting the
526 clean bits if RAIDF_INITED is not set
527 mark things as clean... */
528 rf_update_component_labels( raidPtrs[unit] );
529 }
530
531 raidunlock(rs);
532 return (0);
533
534 }
535
536 void
537 raidstrategy(bp)
538 register struct buf *bp;
539 {
540 register int s;
541
542 unsigned int raidID = raidunit(bp->b_dev);
543 RF_Raid_t *raidPtr;
544 struct raid_softc *rs = &raid_softc[raidID];
545 struct disklabel *lp;
546 struct buf *dp;
547 int wlabel;
548
549 if ((rs->sc_flags & RAIDF_INITED) ==0) {
550 bp->b_error = ENXIO;
551 bp->b_flags = B_ERROR;
552 bp->b_resid = bp->b_bcount;
553 biodone(bp);
554 return;
555 }
556 if (raidID >= numraid || !raidPtrs[raidID]) {
557 bp->b_error = ENODEV;
558 bp->b_flags |= B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 raidPtr = raidPtrs[raidID];
564 if (!raidPtr->valid) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 if (bp->b_bcount == 0) {
572 db1_printf(("b_bcount is zero..\n"));
573 biodone(bp);
574 return;
575 }
576 lp = rs->sc_dkdev.dk_label;
577
578 /*
579 * Do bounds checking and adjust transfer. If there's an
580 * error, the bounds check will flag that for us.
581 */
582
583 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
584 if (DISKPART(bp->b_dev) != RAW_PART)
585 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
586 db1_printf(("Bounds check failed!!:%d %d\n",
587 (int) bp->b_blkno, (int) wlabel));
588 biodone(bp);
589 return;
590 }
591 s = splbio();
592
593 bp->b_resid = 0;
594
595 /* stuff it onto our queue */
596
597 dp = &rs->buf_queue;
598 bp->b_actf = NULL;
599 bp->b_actb = dp->b_actb;
600 *dp->b_actb = bp;
601 dp->b_actb = &bp->b_actf;
602
603 raidstart(raidPtrs[raidID]);
604
605 splx(s);
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 RF_Config_t *k_cfg, *u_cfg;
665 u_char *specific_buf;
666 int retcode = 0;
667 int row;
668 int column;
669 struct rf_recon_req *rrcopy, *rr;
670 RF_ComponentLabel_t *component_label;
671 RF_ComponentLabel_t ci_label;
672 RF_ComponentLabel_t **c_label_ptr;
673 RF_SingleComponent_t *sparePtr,*componentPtr;
674 RF_SingleComponent_t hot_spare;
675 RF_SingleComponent_t component;
676
677 if (unit >= numraid)
678 return (ENXIO);
679 rs = &raid_softc[unit];
680
681 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
682 (int) DISKPART(dev), (int) unit, (int) cmd));
683
684 /* Must be open for writes for these commands... */
685 switch (cmd) {
686 case DIOCSDINFO:
687 case DIOCWDINFO:
688 case DIOCWLABEL:
689 if ((flag & FWRITE) == 0)
690 return (EBADF);
691 }
692
693 /* Must be initialized for these... */
694 switch (cmd) {
695 case DIOCGDINFO:
696 case DIOCSDINFO:
697 case DIOCWDINFO:
698 case DIOCGPART:
699 case DIOCWLABEL:
700 case DIOCGDEFLABEL:
701 case RAIDFRAME_SHUTDOWN:
702 case RAIDFRAME_REWRITEPARITY:
703 case RAIDFRAME_GET_INFO:
704 case RAIDFRAME_RESET_ACCTOTALS:
705 case RAIDFRAME_GET_ACCTOTALS:
706 case RAIDFRAME_KEEP_ACCTOTALS:
707 case RAIDFRAME_GET_SIZE:
708 case RAIDFRAME_FAIL_DISK:
709 case RAIDFRAME_COPYBACK:
710 case RAIDFRAME_CHECK_RECON_STATUS:
711 case RAIDFRAME_GET_COMPONENT_LABEL:
712 case RAIDFRAME_SET_COMPONENT_LABEL:
713 case RAIDFRAME_ADD_HOT_SPARE:
714 case RAIDFRAME_REMOVE_HOT_SPARE:
715 case RAIDFRAME_INIT_LABELS:
716 case RAIDFRAME_REBUILD_IN_PLACE:
717 case RAIDFRAME_CHECK_PARITY:
718 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
719 case RAIDFRAME_CHECK_COPYBACK_STATUS:
720 if ((rs->sc_flags & RAIDF_INITED) == 0)
721 return (ENXIO);
722 }
723
724 switch (cmd) {
725
726
727 /* configure the system */
728 case RAIDFRAME_CONFIGURE:
729
730 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
731 /* copy-in the configuration information */
732 /* data points to a pointer to the configuration structure */
733 u_cfg = *((RF_Config_t **) data);
734 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
735 if (k_cfg == NULL) {
736 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
737 return (ENOMEM);
738 }
739 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
740 sizeof(RF_Config_t));
741 if (retcode) {
742 RF_Free(k_cfg, sizeof(RF_Config_t));
743 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
744 retcode));
745 return (retcode);
746 }
747 /* allocate a buffer for the layout-specific data, and copy it
748 * in */
749 if (k_cfg->layoutSpecificSize) {
750 if (k_cfg->layoutSpecificSize > 10000) {
751 /* sanity check */
752 RF_Free(k_cfg, sizeof(RF_Config_t));
753 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
754 return (EINVAL);
755 }
756 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
757 (u_char *));
758 if (specific_buf == NULL) {
759 RF_Free(k_cfg, sizeof(RF_Config_t));
760 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
761 return (ENOMEM);
762 }
763 retcode = copyin(k_cfg->layoutSpecific,
764 (caddr_t) specific_buf,
765 k_cfg->layoutSpecificSize);
766 if (retcode) {
767 RF_Free(k_cfg, sizeof(RF_Config_t));
768 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
769 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
770 retcode));
771 return (retcode);
772 }
773 } else
774 specific_buf = NULL;
775 k_cfg->layoutSpecific = specific_buf;
776
777 /* should do some kind of sanity check on the configuration.
778 * Store the sum of all the bytes in the last byte? */
779
780 /* configure the system */
781
782 raidPtrs[unit]->raidid = unit;
783
784 retcode = rf_Configure(raidPtrs[unit], k_cfg);
785
786 if (retcode == 0) {
787
788 /* allow this many simultaneous IO's to
789 this RAID device */
790 raidPtrs[unit]->openings = RAIDOUTSTANDING;
791
792 /* XXX should be moved to rf_Configure() */
793
794 raidPtrs[unit]->copyback_in_progress = 0;
795 raidPtrs[unit]->parity_rewrite_in_progress = 0;
796 raidPtrs[unit]->recon_in_progress = 0;
797
798 retcode = raidinit(dev, raidPtrs[unit], unit);
799 rf_markalldirty( raidPtrs[unit] );
800 }
801 /* free the buffers. No return code here. */
802 if (k_cfg->layoutSpecificSize) {
803 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
804 }
805 RF_Free(k_cfg, sizeof(RF_Config_t));
806
807 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
808 retcode));
809
810 return (retcode);
811
812 /* shutdown the system */
813 case RAIDFRAME_SHUTDOWN:
814
815 if ((error = raidlock(rs)) != 0)
816 return (error);
817
818 /*
819 * If somebody has a partition mounted, we shouldn't
820 * shutdown.
821 */
822
823 part = DISKPART(dev);
824 pmask = (1 << part);
825 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
826 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
827 (rs->sc_dkdev.dk_copenmask & pmask))) {
828 raidunlock(rs);
829 return (EBUSY);
830 }
831
832 retcode = rf_Shutdown(raidPtrs[unit]);
833
834 pool_destroy(&rs->sc_cbufpool);
835
836 /* It's no longer initialized... */
837 rs->sc_flags &= ~RAIDF_INITED;
838
839 /* Detach the disk. */
840 disk_detach(&rs->sc_dkdev);
841
842 raidunlock(rs);
843
844 return (retcode);
845 case RAIDFRAME_GET_COMPONENT_LABEL:
846 c_label_ptr = (RF_ComponentLabel_t **) data;
847 /* need to read the component label for the disk indicated
848 by row,column in component_label
849 XXX need to sanity check these values!!!
850 */
851
852 /* For practice, let's get it directly fromdisk, rather
853 than from the in-core copy */
854 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
855 (RF_ComponentLabel_t *));
856 if (component_label == NULL)
857 return (ENOMEM);
858
859 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
860
861 retcode = copyin( *c_label_ptr, component_label,
862 sizeof(RF_ComponentLabel_t));
863
864 if (retcode) {
865 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
866 return(retcode);
867 }
868
869 row = component_label->row;
870 column = component_label->column;
871
872 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
873 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
874 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
875 return(EINVAL);
876 }
877
878 raidread_component_label(
879 raidPtrs[unit]->Disks[row][column].dev,
880 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
881 component_label );
882
883 retcode = copyout((caddr_t) component_label,
884 (caddr_t) *c_label_ptr,
885 sizeof(RF_ComponentLabel_t));
886 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
887 return (retcode);
888
889 case RAIDFRAME_SET_COMPONENT_LABEL:
890 component_label = (RF_ComponentLabel_t *) data;
891
892 /* XXX check the label for valid stuff... */
893 /* Note that some things *should not* get modified --
894 the user should be re-initing the labels instead of
895 trying to patch things.
896 */
897
898 printf("Got component label:\n");
899 printf("Version: %d\n",component_label->version);
900 printf("Serial Number: %d\n",component_label->serial_number);
901 printf("Mod counter: %d\n",component_label->mod_counter);
902 printf("Row: %d\n", component_label->row);
903 printf("Column: %d\n", component_label->column);
904 printf("Num Rows: %d\n", component_label->num_rows);
905 printf("Num Columns: %d\n", component_label->num_columns);
906 printf("Clean: %d\n", component_label->clean);
907 printf("Status: %d\n", component_label->status);
908
909 row = component_label->row;
910 column = component_label->column;
911
912 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
913 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
914 return(EINVAL);
915 }
916
917 /* XXX this isn't allowed to do anything for now :-) */
918 #if 0
919 raidwrite_component_label(
920 raidPtrs[unit]->Disks[row][column].dev,
921 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
922 component_label );
923 #endif
924 return (0);
925
926 case RAIDFRAME_INIT_LABELS:
927 component_label = (RF_ComponentLabel_t *) data;
928 /*
929 we only want the serial number from
930 the above. We get all the rest of the information
931 from the config that was used to create this RAID
932 set.
933 */
934
935 raidPtrs[unit]->serial_number = component_label->serial_number;
936 /* current version number */
937 ci_label.version = RF_COMPONENT_LABEL_VERSION;
938 ci_label.serial_number = component_label->serial_number;
939 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
940 ci_label.num_rows = raidPtrs[unit]->numRow;
941 ci_label.num_columns = raidPtrs[unit]->numCol;
942 ci_label.clean = RF_RAID_DIRTY; /* not clean */
943 ci_label.status = rf_ds_optimal; /* "It's good!" */
944
945 for(row=0;row<raidPtrs[unit]->numRow;row++) {
946 ci_label.row = row;
947 for(column=0;column<raidPtrs[unit]->numCol;column++) {
948 ci_label.column = column;
949 raidwrite_component_label(
950 raidPtrs[unit]->Disks[row][column].dev,
951 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
952 &ci_label );
953 }
954 }
955
956 return (retcode);
957
958 /* initialize all parity */
959 case RAIDFRAME_REWRITEPARITY:
960
961 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
962 /* Parity for RAID 0 is trivially correct */
963 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
964 return(0);
965 }
966
967 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
968 /* Re-write is already in progress! */
969 return(EINVAL);
970 }
971
972 /* borrow the thread of the requesting process */
973
974 retcode = RF_CREATE_THREAD(raidPtrs[unit]->parity_rewrite_thread,
975 rf_RewriteParityThread,
976 raidPtrs[unit],"raid_parity");
977 return (retcode);
978
979
980 case RAIDFRAME_ADD_HOT_SPARE:
981 sparePtr = (RF_SingleComponent_t *) data;
982 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
983 printf("Adding spare\n");
984 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
985 return(retcode);
986
987 case RAIDFRAME_REMOVE_HOT_SPARE:
988 return(retcode);
989
990 case RAIDFRAME_REBUILD_IN_PLACE:
991
992 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
993 /* Can't do this on a RAID 0!! */
994 return(EINVAL);
995 }
996
997 if (raidPtrs[unit]->recon_in_progress == 1) {
998 /* a reconstruct is already in progress! */
999 return(EINVAL);
1000 }
1001
1002 componentPtr = (RF_SingleComponent_t *) data;
1003 memcpy( &component, componentPtr,
1004 sizeof(RF_SingleComponent_t));
1005 row = component.row;
1006 column = component.column;
1007 printf("Rebuild: %d %d\n",row, column);
1008 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1009 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1010 return(EINVAL);
1011 }
1012
1013 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1014 if (rrcopy == NULL)
1015 return(ENOMEM);
1016
1017 rrcopy->raidPtr = (void *) raidPtrs[unit];
1018 rrcopy->row = row;
1019 rrcopy->col = column;
1020
1021 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1022 rf_ReconstructInPlaceThread,
1023 rrcopy,"raid_reconip");
1024 return(retcode);
1025
1026 case RAIDFRAME_GET_INFO:
1027 {
1028 RF_Raid_t *raid = raidPtrs[unit];
1029 RF_DeviceConfig_t *cfg, **ucfgp;
1030 int i, j, d;
1031
1032 if (!raid->valid)
1033 return (ENODEV);
1034 ucfgp = (RF_DeviceConfig_t **) data;
1035 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1036 (RF_DeviceConfig_t *));
1037 if (cfg == NULL)
1038 return (ENOMEM);
1039 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1040 cfg->rows = raid->numRow;
1041 cfg->cols = raid->numCol;
1042 cfg->ndevs = raid->numRow * raid->numCol;
1043 if (cfg->ndevs >= RF_MAX_DISKS) {
1044 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1045 return (ENOMEM);
1046 }
1047 cfg->nspares = raid->numSpare;
1048 if (cfg->nspares >= RF_MAX_DISKS) {
1049 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1050 return (ENOMEM);
1051 }
1052 cfg->maxqdepth = raid->maxQueueDepth;
1053 d = 0;
1054 for (i = 0; i < cfg->rows; i++) {
1055 for (j = 0; j < cfg->cols; j++) {
1056 cfg->devs[d] = raid->Disks[i][j];
1057 d++;
1058 }
1059 }
1060 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1061 cfg->spares[i] = raid->Disks[0][j];
1062 }
1063 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1064 sizeof(RF_DeviceConfig_t));
1065 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1066
1067 return (retcode);
1068 }
1069 break;
1070 case RAIDFRAME_CHECK_PARITY:
1071 *(int *) data = raidPtrs[unit]->parity_good;
1072 return (0);
1073 case RAIDFRAME_RESET_ACCTOTALS:
1074 {
1075 RF_Raid_t *raid = raidPtrs[unit];
1076
1077 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1078 return (0);
1079 }
1080 break;
1081
1082 case RAIDFRAME_GET_ACCTOTALS:
1083 {
1084 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1085 RF_Raid_t *raid = raidPtrs[unit];
1086
1087 *totals = raid->acc_totals;
1088 return (0);
1089 }
1090 break;
1091
1092 case RAIDFRAME_KEEP_ACCTOTALS:
1093 {
1094 RF_Raid_t *raid = raidPtrs[unit];
1095 int *keep = (int *) data;
1096
1097 raid->keep_acc_totals = *keep;
1098 return (0);
1099 }
1100 break;
1101
1102 case RAIDFRAME_GET_SIZE:
1103 *(int *) data = raidPtrs[unit]->totalSectors;
1104 return (0);
1105
1106 /* fail a disk & optionally start reconstruction */
1107 case RAIDFRAME_FAIL_DISK:
1108
1109 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1110 /* Can't do this on a RAID 0!! */
1111 return(EINVAL);
1112 }
1113
1114 rr = (struct rf_recon_req *) data;
1115
1116 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1117 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1118 return (EINVAL);
1119
1120 printf("raid%d: Failing the disk: row: %d col: %d\n",
1121 unit, rr->row, rr->col);
1122
1123 /* make a copy of the recon request so that we don't rely on
1124 * the user's buffer */
1125 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1126 if (rrcopy == NULL)
1127 return(ENOMEM);
1128 bcopy(rr, rrcopy, sizeof(*rr));
1129 rrcopy->raidPtr = (void *) raidPtrs[unit];
1130
1131 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1132 rf_ReconThread,
1133 rrcopy,"raid_recon");
1134 return (0);
1135
1136 /* invoke a copyback operation after recon on whatever disk
1137 * needs it, if any */
1138 case RAIDFRAME_COPYBACK:
1139
1140 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1141 /* This makes no sense on a RAID 0!! */
1142 return(EINVAL);
1143 }
1144
1145 if (raidPtrs[unit]->copyback_in_progress == 1) {
1146 /* Copyback is already in progress! */
1147 return(EINVAL);
1148 }
1149
1150 retcode = RF_CREATE_THREAD(raidPtrs[unit]->copyback_thread,
1151 rf_CopybackThread,
1152 raidPtrs[unit],"raid_copyback");
1153 return (retcode);
1154
1155 /* return the percentage completion of reconstruction */
1156 case RAIDFRAME_CHECK_RECON_STATUS:
1157 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1158 /* This makes no sense on a RAID 0 */
1159 return(EINVAL);
1160 }
1161 row = 0; /* XXX we only consider a single row... */
1162 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1163 *(int *) data = 100;
1164 else
1165 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1166 return (0);
1167
1168 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1169 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1170 /* This makes no sense on a RAID 0 */
1171 return(EINVAL);
1172 }
1173 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
1174 *(int *) data = 100 * raidPtrs[unit]->parity_rewrite_stripes_done / raidPtrs[unit]->Layout.numStripe;
1175 } else {
1176 *(int *) data = 100;
1177 }
1178 return (0);
1179
1180 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1181 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1182 /* This makes no sense on a RAID 0 */
1183 return(EINVAL);
1184 }
1185 if (raidPtrs[unit]->copyback_in_progress == 1) {
1186 *(int *) data = 100 * raidPtrs[unit]->copyback_stripes_done / raidPtrs[unit]->Layout.numStripe;
1187 } else {
1188 *(int *) data = 100;
1189 }
1190 return (0);
1191
1192
1193 /* the sparetable daemon calls this to wait for the kernel to
1194 * need a spare table. this ioctl does not return until a
1195 * spare table is needed. XXX -- calling mpsleep here in the
1196 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1197 * -- I should either compute the spare table in the kernel,
1198 * or have a different -- XXX XXX -- interface (a different
1199 * character device) for delivering the table -- XXX */
1200 #if 0
1201 case RAIDFRAME_SPARET_WAIT:
1202 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1203 while (!rf_sparet_wait_queue)
1204 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1205 waitreq = rf_sparet_wait_queue;
1206 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1207 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1208
1209 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1210
1211 RF_Free(waitreq, sizeof(*waitreq));
1212 return (0);
1213
1214
1215 /* wakes up a process waiting on SPARET_WAIT and puts an error
1216 * code in it that will cause the dameon to exit */
1217 case RAIDFRAME_ABORT_SPARET_WAIT:
1218 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1219 waitreq->fcol = -1;
1220 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1221 waitreq->next = rf_sparet_wait_queue;
1222 rf_sparet_wait_queue = waitreq;
1223 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1224 wakeup(&rf_sparet_wait_queue);
1225 return (0);
1226
1227 /* used by the spare table daemon to deliver a spare table
1228 * into the kernel */
1229 case RAIDFRAME_SEND_SPARET:
1230
1231 /* install the spare table */
1232 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1233
1234 /* respond to the requestor. the return status of the spare
1235 * table installation is passed in the "fcol" field */
1236 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1237 waitreq->fcol = retcode;
1238 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1239 waitreq->next = rf_sparet_resp_queue;
1240 rf_sparet_resp_queue = waitreq;
1241 wakeup(&rf_sparet_resp_queue);
1242 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1243
1244 return (retcode);
1245 #endif
1246
1247 default:
1248 break; /* fall through to the os-specific code below */
1249
1250 }
1251
1252 if (!raidPtrs[unit]->valid)
1253 return (EINVAL);
1254
1255 /*
1256 * Add support for "regular" device ioctls here.
1257 */
1258
1259 switch (cmd) {
1260 case DIOCGDINFO:
1261 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1262 break;
1263
1264 case DIOCGPART:
1265 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1266 ((struct partinfo *) data)->part =
1267 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1268 break;
1269
1270 case DIOCWDINFO:
1271 case DIOCSDINFO:
1272 if ((error = raidlock(rs)) != 0)
1273 return (error);
1274
1275 rs->sc_flags |= RAIDF_LABELLING;
1276
1277 error = setdisklabel(rs->sc_dkdev.dk_label,
1278 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1279 if (error == 0) {
1280 if (cmd == DIOCWDINFO)
1281 error = writedisklabel(RAIDLABELDEV(dev),
1282 raidstrategy, rs->sc_dkdev.dk_label,
1283 rs->sc_dkdev.dk_cpulabel);
1284 }
1285 rs->sc_flags &= ~RAIDF_LABELLING;
1286
1287 raidunlock(rs);
1288
1289 if (error)
1290 return (error);
1291 break;
1292
1293 case DIOCWLABEL:
1294 if (*(int *) data != 0)
1295 rs->sc_flags |= RAIDF_WLABEL;
1296 else
1297 rs->sc_flags &= ~RAIDF_WLABEL;
1298 break;
1299
1300 case DIOCGDEFLABEL:
1301 raidgetdefaultlabel(raidPtrs[unit], rs,
1302 (struct disklabel *) data);
1303 break;
1304
1305 default:
1306 retcode = ENOTTY;
1307 }
1308 return (retcode);
1309
1310 }
1311
1312
1313 /* raidinit -- complete the rest of the initialization for the
1314 RAIDframe device. */
1315
1316
1317 static int
1318 raidinit(dev, raidPtr, unit)
1319 dev_t dev;
1320 RF_Raid_t *raidPtr;
1321 int unit;
1322 {
1323 int retcode;
1324 struct raid_softc *rs;
1325
1326 retcode = 0;
1327
1328 rs = &raid_softc[unit];
1329 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1330 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1331
1332
1333 /* XXX should check return code first... */
1334 rs->sc_flags |= RAIDF_INITED;
1335
1336 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1337
1338 rs->sc_dkdev.dk_name = rs->sc_xname;
1339
1340 /* disk_attach actually creates space for the CPU disklabel, among
1341 * other things, so it's critical to call this *BEFORE* we try putzing
1342 * with disklabels. */
1343
1344 disk_attach(&rs->sc_dkdev);
1345
1346 /* XXX There may be a weird interaction here between this, and
1347 * protectedSectors, as used in RAIDframe. */
1348
1349 rs->sc_size = raidPtr->totalSectors;
1350 rs->sc_dev = dev;
1351
1352 return (retcode);
1353 }
1354
1355 /* wake up the daemon & tell it to get us a spare table
1356 * XXX
1357 * the entries in the queues should be tagged with the raidPtr
1358 * so that in the extremely rare case that two recons happen at once,
1359 * we know for which device were requesting a spare table
1360 * XXX
1361 *
1362 * XXX This code is not currently used. GO
1363 */
1364 int
1365 rf_GetSpareTableFromDaemon(req)
1366 RF_SparetWait_t *req;
1367 {
1368 int retcode;
1369
1370 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1371 req->next = rf_sparet_wait_queue;
1372 rf_sparet_wait_queue = req;
1373 wakeup(&rf_sparet_wait_queue);
1374
1375 /* mpsleep unlocks the mutex */
1376 while (!rf_sparet_resp_queue) {
1377 tsleep(&rf_sparet_resp_queue, PRIBIO,
1378 "raidframe getsparetable", 0);
1379 }
1380 req = rf_sparet_resp_queue;
1381 rf_sparet_resp_queue = req->next;
1382 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1383
1384 retcode = req->fcol;
1385 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1386 * alloc'd */
1387 return (retcode);
1388 }
1389
1390 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1391 * bp & passes it down.
1392 * any calls originating in the kernel must use non-blocking I/O
1393 * do some extra sanity checking to return "appropriate" error values for
1394 * certain conditions (to make some standard utilities work)
1395 *
1396 * Formerly known as: rf_DoAccessKernel
1397 */
1398 void
1399 raidstart(raidPtr)
1400 RF_Raid_t *raidPtr;
1401 {
1402 RF_SectorCount_t num_blocks, pb, sum;
1403 RF_RaidAddr_t raid_addr;
1404 int retcode;
1405 struct partition *pp;
1406 daddr_t blocknum;
1407 int unit;
1408 struct raid_softc *rs;
1409 int do_async;
1410 struct buf *bp;
1411 struct buf *dp;
1412
1413 unit = raidPtr->raidid;
1414 rs = &raid_softc[unit];
1415
1416 /* Check to see if we're at the limit... */
1417 RF_LOCK_MUTEX(raidPtr->mutex);
1418 while (raidPtr->openings > 0) {
1419 RF_UNLOCK_MUTEX(raidPtr->mutex);
1420
1421 /* get the next item, if any, from the queue */
1422 dp = &rs->buf_queue;
1423 bp = dp->b_actf;
1424 if (bp == NULL) {
1425 /* nothing more to do */
1426 return;
1427 }
1428
1429 /* update structures */
1430 dp = bp->b_actf;
1431 if (dp != NULL) {
1432 dp->b_actb = bp->b_actb;
1433 } else {
1434 rs->buf_queue.b_actb = bp->b_actb;
1435 }
1436 *bp->b_actb = dp;
1437
1438 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1439 * partition.. Need to make it absolute to the underlying
1440 * device.. */
1441
1442 blocknum = bp->b_blkno;
1443 if (DISKPART(bp->b_dev) != RAW_PART) {
1444 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1445 blocknum += pp->p_offset;
1446 }
1447
1448 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1449 (int) blocknum));
1450
1451 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1452 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1453
1454 /* *THIS* is where we adjust what block we're going to...
1455 * but DO NOT TOUCH bp->b_blkno!!! */
1456 raid_addr = blocknum;
1457
1458 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1459 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1460 sum = raid_addr + num_blocks + pb;
1461 if (1 || rf_debugKernelAccess) {
1462 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1463 (int) raid_addr, (int) sum, (int) num_blocks,
1464 (int) pb, (int) bp->b_resid));
1465 }
1466 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1467 || (sum < num_blocks) || (sum < pb)) {
1468 bp->b_error = ENOSPC;
1469 bp->b_flags |= B_ERROR;
1470 bp->b_resid = bp->b_bcount;
1471 biodone(bp);
1472 RF_LOCK_MUTEX(raidPtr->mutex);
1473 continue;
1474 }
1475 /*
1476 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1477 */
1478
1479 if (bp->b_bcount & raidPtr->sectorMask) {
1480 bp->b_error = EINVAL;
1481 bp->b_flags |= B_ERROR;
1482 bp->b_resid = bp->b_bcount;
1483 biodone(bp);
1484 RF_LOCK_MUTEX(raidPtr->mutex);
1485 continue;
1486
1487 }
1488 db1_printf(("Calling DoAccess..\n"));
1489
1490
1491 RF_LOCK_MUTEX(raidPtr->mutex);
1492 raidPtr->openings--;
1493 RF_UNLOCK_MUTEX(raidPtr->mutex);
1494
1495 /*
1496 * Everything is async.
1497 */
1498 do_async = 1;
1499
1500 /* don't ever condition on bp->b_flags & B_WRITE.
1501 * always condition on B_READ instead */
1502
1503 /* XXX we're still at splbio() here... do we *really*
1504 need to be? */
1505
1506
1507 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1508 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1509 do_async, raid_addr, num_blocks,
1510 bp->b_un.b_addr, bp, NULL, NULL,
1511 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1512
1513
1514 RF_LOCK_MUTEX(raidPtr->mutex);
1515 }
1516 RF_UNLOCK_MUTEX(raidPtr->mutex);
1517 }
1518
1519
1520
1521
1522 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1523
1524 int
1525 rf_DispatchKernelIO(queue, req)
1526 RF_DiskQueue_t *queue;
1527 RF_DiskQueueData_t *req;
1528 {
1529 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1530 struct buf *bp;
1531 struct raidbuf *raidbp = NULL;
1532 struct raid_softc *rs;
1533 int unit;
1534 int s;
1535
1536 s=0;
1537 /* s = splbio();*/ /* want to test this */
1538 /* XXX along with the vnode, we also need the softc associated with
1539 * this device.. */
1540
1541 req->queue = queue;
1542
1543 unit = queue->raidPtr->raidid;
1544
1545 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1546
1547 if (unit >= numraid) {
1548 printf("Invalid unit number: %d %d\n", unit, numraid);
1549 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1550 }
1551 rs = &raid_softc[unit];
1552
1553 /* XXX is this the right place? */
1554 disk_busy(&rs->sc_dkdev);
1555
1556 bp = req->bp;
1557 #if 1
1558 /* XXX when there is a physical disk failure, someone is passing us a
1559 * buffer that contains old stuff!! Attempt to deal with this problem
1560 * without taking a performance hit... (not sure where the real bug
1561 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1562
1563 if (bp->b_flags & B_ERROR) {
1564 bp->b_flags &= ~B_ERROR;
1565 }
1566 if (bp->b_error != 0) {
1567 bp->b_error = 0;
1568 }
1569 #endif
1570 raidbp = RAIDGETBUF(rs);
1571
1572 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1573
1574 /*
1575 * context for raidiodone
1576 */
1577 raidbp->rf_obp = bp;
1578 raidbp->req = req;
1579
1580 LIST_INIT(&raidbp->rf_buf.b_dep);
1581
1582 switch (req->type) {
1583 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1584 /* XXX need to do something extra here.. */
1585 /* I'm leaving this in, as I've never actually seen it used,
1586 * and I'd like folks to report it... GO */
1587 printf(("WAKEUP CALLED\n"));
1588 queue->numOutstanding++;
1589
1590 /* XXX need to glue the original buffer into this?? */
1591
1592 KernelWakeupFunc(&raidbp->rf_buf);
1593 break;
1594
1595 case RF_IO_TYPE_READ:
1596 case RF_IO_TYPE_WRITE:
1597
1598 if (req->tracerec) {
1599 RF_ETIMER_START(req->tracerec->timer);
1600 }
1601 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1602 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1603 req->sectorOffset, req->numSector,
1604 req->buf, KernelWakeupFunc, (void *) req,
1605 queue->raidPtr->logBytesPerSector, req->b_proc);
1606
1607 if (rf_debugKernelAccess) {
1608 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1609 (long) bp->b_blkno));
1610 }
1611 queue->numOutstanding++;
1612 queue->last_deq_sector = req->sectorOffset;
1613 /* acc wouldn't have been let in if there were any pending
1614 * reqs at any other priority */
1615 queue->curPriority = req->priority;
1616
1617 db1_printf(("Going for %c to unit %d row %d col %d\n",
1618 req->type, unit, queue->row, queue->col));
1619 db1_printf(("sector %d count %d (%d bytes) %d\n",
1620 (int) req->sectorOffset, (int) req->numSector,
1621 (int) (req->numSector <<
1622 queue->raidPtr->logBytesPerSector),
1623 (int) queue->raidPtr->logBytesPerSector));
1624 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1625 raidbp->rf_buf.b_vp->v_numoutput++;
1626 }
1627 VOP_STRATEGY(&raidbp->rf_buf);
1628
1629 break;
1630
1631 default:
1632 panic("bad req->type in rf_DispatchKernelIO");
1633 }
1634 db1_printf(("Exiting from DispatchKernelIO\n"));
1635 /* splx(s); */ /* want to test this */
1636 return (0);
1637 }
1638 /* this is the callback function associated with a I/O invoked from
1639 kernel code.
1640 */
1641 static void
1642 KernelWakeupFunc(vbp)
1643 struct buf *vbp;
1644 {
1645 RF_DiskQueueData_t *req = NULL;
1646 RF_DiskQueue_t *queue;
1647 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1648 struct buf *bp;
1649 struct raid_softc *rs;
1650 int unit;
1651 register int s;
1652
1653 s = splbio();
1654 db1_printf(("recovering the request queue:\n"));
1655 req = raidbp->req;
1656
1657 bp = raidbp->rf_obp;
1658
1659 queue = (RF_DiskQueue_t *) req->queue;
1660
1661 if (raidbp->rf_buf.b_flags & B_ERROR) {
1662 bp->b_flags |= B_ERROR;
1663 bp->b_error = raidbp->rf_buf.b_error ?
1664 raidbp->rf_buf.b_error : EIO;
1665 }
1666
1667 /* XXX methinks this could be wrong... */
1668 #if 1
1669 bp->b_resid = raidbp->rf_buf.b_resid;
1670 #endif
1671
1672 if (req->tracerec) {
1673 RF_ETIMER_STOP(req->tracerec->timer);
1674 RF_ETIMER_EVAL(req->tracerec->timer);
1675 RF_LOCK_MUTEX(rf_tracing_mutex);
1676 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1677 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1678 req->tracerec->num_phys_ios++;
1679 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1680 }
1681 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1682
1683 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1684
1685
1686 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1687 * ballistic, and mark the component as hosed... */
1688
1689 if (bp->b_flags & B_ERROR) {
1690 /* Mark the disk as dead */
1691 /* but only mark it once... */
1692 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1693 rf_ds_optimal) {
1694 printf("raid%d: IO Error. Marking %s as failed.\n",
1695 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1696 queue->raidPtr->Disks[queue->row][queue->col].status =
1697 rf_ds_failed;
1698 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1699 queue->raidPtr->numFailures++;
1700 /* XXX here we should bump the version number for each component, and write that data out */
1701 } else { /* Disk is already dead... */
1702 /* printf("Disk already marked as dead!\n"); */
1703 }
1704
1705 }
1706
1707 rs = &raid_softc[unit];
1708 RAIDPUTBUF(rs, raidbp);
1709
1710
1711 if (bp->b_resid == 0) {
1712 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1713 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1714 }
1715
1716 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1717 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1718
1719 splx(s);
1720 }
1721
1722
1723
1724 /*
1725 * initialize a buf structure for doing an I/O in the kernel.
1726 */
1727 static void
1728 InitBP(
1729 struct buf * bp,
1730 struct vnode * b_vp,
1731 unsigned rw_flag,
1732 dev_t dev,
1733 RF_SectorNum_t startSect,
1734 RF_SectorCount_t numSect,
1735 caddr_t buf,
1736 void (*cbFunc) (struct buf *),
1737 void *cbArg,
1738 int logBytesPerSector,
1739 struct proc * b_proc)
1740 {
1741 /* bp->b_flags = B_PHYS | rw_flag; */
1742 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1743 bp->b_bcount = numSect << logBytesPerSector;
1744 bp->b_bufsize = bp->b_bcount;
1745 bp->b_error = 0;
1746 bp->b_dev = dev;
1747 bp->b_un.b_addr = buf;
1748 bp->b_blkno = startSect;
1749 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1750 if (bp->b_bcount == 0) {
1751 panic("bp->b_bcount is zero in InitBP!!\n");
1752 }
1753 bp->b_proc = b_proc;
1754 bp->b_iodone = cbFunc;
1755 bp->b_vp = b_vp;
1756
1757 }
1758
1759 static void
1760 raidgetdefaultlabel(raidPtr, rs, lp)
1761 RF_Raid_t *raidPtr;
1762 struct raid_softc *rs;
1763 struct disklabel *lp;
1764 {
1765 db1_printf(("Building a default label...\n"));
1766 bzero(lp, sizeof(*lp));
1767
1768 /* fabricate a label... */
1769 lp->d_secperunit = raidPtr->totalSectors;
1770 lp->d_secsize = raidPtr->bytesPerSector;
1771 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1772 lp->d_ntracks = 1;
1773 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1774 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1775
1776 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1777 lp->d_type = DTYPE_RAID;
1778 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1779 lp->d_rpm = 3600;
1780 lp->d_interleave = 1;
1781 lp->d_flags = 0;
1782
1783 lp->d_partitions[RAW_PART].p_offset = 0;
1784 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1785 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1786 lp->d_npartitions = RAW_PART + 1;
1787
1788 lp->d_magic = DISKMAGIC;
1789 lp->d_magic2 = DISKMAGIC;
1790 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1791
1792 }
1793 /*
1794 * Read the disklabel from the raid device. If one is not present, fake one
1795 * up.
1796 */
1797 static void
1798 raidgetdisklabel(dev)
1799 dev_t dev;
1800 {
1801 int unit = raidunit(dev);
1802 struct raid_softc *rs = &raid_softc[unit];
1803 char *errstring;
1804 struct disklabel *lp = rs->sc_dkdev.dk_label;
1805 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1806 RF_Raid_t *raidPtr;
1807
1808 db1_printf(("Getting the disklabel...\n"));
1809
1810 bzero(clp, sizeof(*clp));
1811
1812 raidPtr = raidPtrs[unit];
1813
1814 raidgetdefaultlabel(raidPtr, rs, lp);
1815
1816 /*
1817 * Call the generic disklabel extraction routine.
1818 */
1819 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1820 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1821 if (errstring)
1822 raidmakedisklabel(rs);
1823 else {
1824 int i;
1825 struct partition *pp;
1826
1827 /*
1828 * Sanity check whether the found disklabel is valid.
1829 *
1830 * This is necessary since total size of the raid device
1831 * may vary when an interleave is changed even though exactly
1832 * same componets are used, and old disklabel may used
1833 * if that is found.
1834 */
1835 if (lp->d_secperunit != rs->sc_size)
1836 printf("WARNING: %s: "
1837 "total sector size in disklabel (%d) != "
1838 "the size of raid (%ld)\n", rs->sc_xname,
1839 lp->d_secperunit, (long) rs->sc_size);
1840 for (i = 0; i < lp->d_npartitions; i++) {
1841 pp = &lp->d_partitions[i];
1842 if (pp->p_offset + pp->p_size > rs->sc_size)
1843 printf("WARNING: %s: end of partition `%c' "
1844 "exceeds the size of raid (%ld)\n",
1845 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1846 }
1847 }
1848
1849 }
1850 /*
1851 * Take care of things one might want to take care of in the event
1852 * that a disklabel isn't present.
1853 */
1854 static void
1855 raidmakedisklabel(rs)
1856 struct raid_softc *rs;
1857 {
1858 struct disklabel *lp = rs->sc_dkdev.dk_label;
1859 db1_printf(("Making a label..\n"));
1860
1861 /*
1862 * For historical reasons, if there's no disklabel present
1863 * the raw partition must be marked FS_BSDFFS.
1864 */
1865
1866 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1867
1868 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1869
1870 lp->d_checksum = dkcksum(lp);
1871 }
1872 /*
1873 * Lookup the provided name in the filesystem. If the file exists,
1874 * is a valid block device, and isn't being used by anyone else,
1875 * set *vpp to the file's vnode.
1876 * You'll find the original of this in ccd.c
1877 */
1878 int
1879 raidlookup(path, p, vpp)
1880 char *path;
1881 struct proc *p;
1882 struct vnode **vpp; /* result */
1883 {
1884 struct nameidata nd;
1885 struct vnode *vp;
1886 struct vattr va;
1887 int error;
1888
1889 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1890 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1891 #ifdef DEBUG
1892 printf("RAIDframe: vn_open returned %d\n", error);
1893 #endif
1894 return (error);
1895 }
1896 vp = nd.ni_vp;
1897 if (vp->v_usecount > 1) {
1898 VOP_UNLOCK(vp, 0);
1899 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1900 return (EBUSY);
1901 }
1902 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1903 VOP_UNLOCK(vp, 0);
1904 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1905 return (error);
1906 }
1907 /* XXX: eventually we should handle VREG, too. */
1908 if (va.va_type != VBLK) {
1909 VOP_UNLOCK(vp, 0);
1910 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1911 return (ENOTBLK);
1912 }
1913 VOP_UNLOCK(vp, 0);
1914 *vpp = vp;
1915 return (0);
1916 }
1917 /*
1918 * Wait interruptibly for an exclusive lock.
1919 *
1920 * XXX
1921 * Several drivers do this; it should be abstracted and made MP-safe.
1922 * (Hmm... where have we seen this warning before :-> GO )
1923 */
1924 static int
1925 raidlock(rs)
1926 struct raid_softc *rs;
1927 {
1928 int error;
1929
1930 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1931 rs->sc_flags |= RAIDF_WANTED;
1932 if ((error =
1933 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1934 return (error);
1935 }
1936 rs->sc_flags |= RAIDF_LOCKED;
1937 return (0);
1938 }
1939 /*
1940 * Unlock and wake up any waiters.
1941 */
1942 static void
1943 raidunlock(rs)
1944 struct raid_softc *rs;
1945 {
1946
1947 rs->sc_flags &= ~RAIDF_LOCKED;
1948 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1949 rs->sc_flags &= ~RAIDF_WANTED;
1950 wakeup(rs);
1951 }
1952 }
1953
1954
1955 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1956 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1957
1958 int
1959 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1960 {
1961 RF_ComponentLabel_t component_label;
1962 raidread_component_label(dev, b_vp, &component_label);
1963 component_label.mod_counter = mod_counter;
1964 component_label.clean = RF_RAID_CLEAN;
1965 raidwrite_component_label(dev, b_vp, &component_label);
1966 return(0);
1967 }
1968
1969
1970 int
1971 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1972 {
1973 RF_ComponentLabel_t component_label;
1974 raidread_component_label(dev, b_vp, &component_label);
1975 component_label.mod_counter = mod_counter;
1976 component_label.clean = RF_RAID_DIRTY;
1977 raidwrite_component_label(dev, b_vp, &component_label);
1978 return(0);
1979 }
1980
1981 /* ARGSUSED */
1982 int
1983 raidread_component_label(dev, b_vp, component_label)
1984 dev_t dev;
1985 struct vnode *b_vp;
1986 RF_ComponentLabel_t *component_label;
1987 {
1988 struct buf *bp;
1989 int error;
1990
1991 /* XXX should probably ensure that we don't try to do this if
1992 someone has changed rf_protected_sectors. */
1993
1994 /* get a block of the appropriate size... */
1995 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1996 bp->b_dev = dev;
1997
1998 /* get our ducks in a row for the read */
1999 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2000 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2001 bp->b_flags = B_BUSY | B_READ;
2002 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2003
2004 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2005
2006 error = biowait(bp);
2007
2008 if (!error) {
2009 memcpy(component_label, bp->b_un.b_addr,
2010 sizeof(RF_ComponentLabel_t));
2011 #if 0
2012 printf("raidread_component_label: got component label:\n");
2013 printf("Version: %d\n",component_label->version);
2014 printf("Serial Number: %d\n",component_label->serial_number);
2015 printf("Mod counter: %d\n",component_label->mod_counter);
2016 printf("Row: %d\n", component_label->row);
2017 printf("Column: %d\n", component_label->column);
2018 printf("Num Rows: %d\n", component_label->num_rows);
2019 printf("Num Columns: %d\n", component_label->num_columns);
2020 printf("Clean: %d\n", component_label->clean);
2021 printf("Status: %d\n", component_label->status);
2022 #endif
2023 } else {
2024 printf("Failed to read RAID component label!\n");
2025 }
2026
2027 bp->b_flags = B_INVAL | B_AGE;
2028 brelse(bp);
2029 return(error);
2030 }
2031 /* ARGSUSED */
2032 int
2033 raidwrite_component_label(dev, b_vp, component_label)
2034 dev_t dev;
2035 struct vnode *b_vp;
2036 RF_ComponentLabel_t *component_label;
2037 {
2038 struct buf *bp;
2039 int error;
2040
2041 /* get a block of the appropriate size... */
2042 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2043 bp->b_dev = dev;
2044
2045 /* get our ducks in a row for the write */
2046 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2047 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2048 bp->b_flags = B_BUSY | B_WRITE;
2049 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2050
2051 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2052
2053 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2054
2055 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2056 error = biowait(bp);
2057 bp->b_flags = B_INVAL | B_AGE;
2058 brelse(bp);
2059 if (error) {
2060 printf("Failed to write RAID component info!\n");
2061 }
2062
2063 return(error);
2064 }
2065
2066 void
2067 rf_markalldirty( raidPtr )
2068 RF_Raid_t *raidPtr;
2069 {
2070 RF_ComponentLabel_t c_label;
2071 int r,c;
2072
2073 raidPtr->mod_counter++;
2074 for (r = 0; r < raidPtr->numRow; r++) {
2075 for (c = 0; c < raidPtr->numCol; c++) {
2076 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2077 raidread_component_label(
2078 raidPtr->Disks[r][c].dev,
2079 raidPtr->raid_cinfo[r][c].ci_vp,
2080 &c_label);
2081 if (c_label.status == rf_ds_spared) {
2082 /* XXX do something special...
2083 but whatever you do, don't
2084 try to access it!! */
2085 } else {
2086 #if 0
2087 c_label.status =
2088 raidPtr->Disks[r][c].status;
2089 raidwrite_component_label(
2090 raidPtr->Disks[r][c].dev,
2091 raidPtr->raid_cinfo[r][c].ci_vp,
2092 &c_label);
2093 #endif
2094 raidmarkdirty(
2095 raidPtr->Disks[r][c].dev,
2096 raidPtr->raid_cinfo[r][c].ci_vp,
2097 raidPtr->mod_counter);
2098 }
2099 }
2100 }
2101 }
2102 /* printf("Component labels marked dirty.\n"); */
2103 #if 0
2104 for( c = 0; c < raidPtr->numSpare ; c++) {
2105 sparecol = raidPtr->numCol + c;
2106 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2107 /*
2108
2109 XXX this is where we get fancy and map this spare
2110 into it's correct spot in the array.
2111
2112 */
2113 /*
2114
2115 we claim this disk is "optimal" if it's
2116 rf_ds_used_spare, as that means it should be
2117 directly substitutable for the disk it replaced.
2118 We note that too...
2119
2120 */
2121
2122 for(i=0;i<raidPtr->numRow;i++) {
2123 for(j=0;j<raidPtr->numCol;j++) {
2124 if ((raidPtr->Disks[i][j].spareRow ==
2125 r) &&
2126 (raidPtr->Disks[i][j].spareCol ==
2127 sparecol)) {
2128 srow = r;
2129 scol = sparecol;
2130 break;
2131 }
2132 }
2133 }
2134
2135 raidread_component_label(
2136 raidPtr->Disks[r][sparecol].dev,
2137 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2138 &c_label);
2139 /* make sure status is noted */
2140 c_label.version = RF_COMPONENT_LABEL_VERSION;
2141 c_label.mod_counter = raidPtr->mod_counter;
2142 c_label.serial_number = raidPtr->serial_number;
2143 c_label.row = srow;
2144 c_label.column = scol;
2145 c_label.num_rows = raidPtr->numRow;
2146 c_label.num_columns = raidPtr->numCol;
2147 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2148 c_label.status = rf_ds_optimal;
2149 raidwrite_component_label(
2150 raidPtr->Disks[r][sparecol].dev,
2151 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2152 &c_label);
2153 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2154 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2155 }
2156 }
2157
2158 #endif
2159 }
2160
2161
2162 void
2163 rf_update_component_labels( raidPtr )
2164 RF_Raid_t *raidPtr;
2165 {
2166 RF_ComponentLabel_t c_label;
2167 int sparecol;
2168 int r,c;
2169 int i,j;
2170 int srow, scol;
2171
2172 srow = -1;
2173 scol = -1;
2174
2175 /* XXX should do extra checks to make sure things really are clean,
2176 rather than blindly setting the clean bit... */
2177
2178 raidPtr->mod_counter++;
2179
2180 for (r = 0; r < raidPtr->numRow; r++) {
2181 for (c = 0; c < raidPtr->numCol; c++) {
2182 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2183 raidread_component_label(
2184 raidPtr->Disks[r][c].dev,
2185 raidPtr->raid_cinfo[r][c].ci_vp,
2186 &c_label);
2187 /* make sure status is noted */
2188 c_label.status = rf_ds_optimal;
2189 raidwrite_component_label(
2190 raidPtr->Disks[r][c].dev,
2191 raidPtr->raid_cinfo[r][c].ci_vp,
2192 &c_label);
2193 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2194 raidmarkclean(
2195 raidPtr->Disks[r][c].dev,
2196 raidPtr->raid_cinfo[r][c].ci_vp,
2197 raidPtr->mod_counter);
2198 }
2199 }
2200 /* else we don't touch it.. */
2201 #if 0
2202 else if (raidPtr->Disks[r][c].status !=
2203 rf_ds_failed) {
2204 raidread_component_label(
2205 raidPtr->Disks[r][c].dev,
2206 raidPtr->raid_cinfo[r][c].ci_vp,
2207 &c_label);
2208 /* make sure status is noted */
2209 c_label.status =
2210 raidPtr->Disks[r][c].status;
2211 raidwrite_component_label(
2212 raidPtr->Disks[r][c].dev,
2213 raidPtr->raid_cinfo[r][c].ci_vp,
2214 &c_label);
2215 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2216 raidmarkclean(
2217 raidPtr->Disks[r][c].dev,
2218 raidPtr->raid_cinfo[r][c].ci_vp,
2219 raidPtr->mod_counter);
2220 }
2221 }
2222 #endif
2223 }
2224 }
2225
2226 for( c = 0; c < raidPtr->numSpare ; c++) {
2227 sparecol = raidPtr->numCol + c;
2228 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2229 /*
2230
2231 we claim this disk is "optimal" if it's
2232 rf_ds_used_spare, as that means it should be
2233 directly substitutable for the disk it replaced.
2234 We note that too...
2235
2236 */
2237
2238 for(i=0;i<raidPtr->numRow;i++) {
2239 for(j=0;j<raidPtr->numCol;j++) {
2240 if ((raidPtr->Disks[i][j].spareRow ==
2241 0) &&
2242 (raidPtr->Disks[i][j].spareCol ==
2243 sparecol)) {
2244 srow = i;
2245 scol = j;
2246 break;
2247 }
2248 }
2249 }
2250
2251 raidread_component_label(
2252 raidPtr->Disks[0][sparecol].dev,
2253 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2254 &c_label);
2255 /* make sure status is noted */
2256 c_label.version = RF_COMPONENT_LABEL_VERSION;
2257 c_label.mod_counter = raidPtr->mod_counter;
2258 c_label.serial_number = raidPtr->serial_number;
2259 c_label.row = srow;
2260 c_label.column = scol;
2261 c_label.num_rows = raidPtr->numRow;
2262 c_label.num_columns = raidPtr->numCol;
2263 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2264 c_label.status = rf_ds_optimal;
2265 raidwrite_component_label(
2266 raidPtr->Disks[0][sparecol].dev,
2267 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2268 &c_label);
2269 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2270 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2271 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2272 raidPtr->mod_counter);
2273 }
2274 }
2275 }
2276 /* printf("Component labels updated\n"); */
2277 }
2278
2279 void
2280 rf_ReconThread(req)
2281 struct rf_recon_req *req;
2282 {
2283 int s;
2284 RF_Raid_t *raidPtr;
2285
2286 s = splbio();
2287 raidPtr = (RF_Raid_t *) req->raidPtr;
2288 raidPtr->recon_in_progress = 1;
2289
2290 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2291 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2292
2293 /* XXX get rid of this! we don't need it at all.. */
2294 RF_Free(req, sizeof(*req));
2295
2296 raidPtr->recon_in_progress = 0;
2297 splx(s);
2298
2299 /* That's all... */
2300 kthread_exit(0); /* does not return */
2301 }
2302
2303 void
2304 rf_RewriteParityThread(raidPtr)
2305 RF_Raid_t *raidPtr;
2306 {
2307 int retcode;
2308 int s;
2309
2310 raidPtr->parity_rewrite_in_progress = 1;
2311 s = splbio();
2312 retcode = rf_RewriteParity(raidPtr);
2313 splx(s);
2314 if (retcode) {
2315 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2316 } else {
2317 /* set the clean bit! If we shutdown correctly,
2318 the clean bit on each component label will get
2319 set */
2320 raidPtr->parity_good = RF_RAID_CLEAN;
2321 }
2322 raidPtr->parity_rewrite_in_progress = 0;
2323
2324 /* That's all... */
2325 kthread_exit(0); /* does not return */
2326 }
2327
2328
2329 void
2330 rf_CopybackThread(raidPtr)
2331 RF_Raid_t *raidPtr;
2332 {
2333 int s;
2334
2335 raidPtr->copyback_in_progress = 1;
2336 s = splbio();
2337 rf_CopybackReconstructedData(raidPtr);
2338 splx(s);
2339 raidPtr->copyback_in_progress = 0;
2340
2341 /* That's all... */
2342 kthread_exit(0); /* does not return */
2343 }
2344
2345
2346 void
2347 rf_ReconstructInPlaceThread(req)
2348 struct rf_recon_req *req;
2349 {
2350 int retcode;
2351 int s;
2352 RF_Raid_t *raidPtr;
2353
2354 s = splbio();
2355 raidPtr = req->raidPtr;
2356 raidPtr->recon_in_progress = 1;
2357 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2358 RF_Free(req, sizeof(*req));
2359 raidPtr->recon_in_progress = 0;
2360 splx(s);
2361
2362 /* That's all... */
2363 kthread_exit(0); /* does not return */
2364 }
2365