rf_netbsdkintf.c revision 1.41 1 /* $NetBSD: rf_netbsdkintf.c,v 1.41 2000/01/08 01:37:37 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf * bp);
184 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
185 dev_t dev, RF_SectorNum_t startSect,
186 RF_SectorCount_t numSect, caddr_t buf,
187 void (*cbFunc) (struct buf *), void *cbArg,
188 int logBytesPerSector, struct proc * b_proc);
189
190 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
191 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
192
193 void raidattach __P((int));
194 int raidsize __P((dev_t));
195
196 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
197 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
198 static int raidinit __P((dev_t, RF_Raid_t *, int));
199
200 int raidopen __P((dev_t, int, int, struct proc *));
201 int raidclose __P((dev_t, int, int, struct proc *));
202 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
203 int raidwrite __P((dev_t, struct uio *, int));
204 int raidread __P((dev_t, struct uio *, int));
205 void raidstrategy __P((struct buf *));
206 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
207
208 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
209 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
210 void rf_update_component_labels( RF_Raid_t *);
211 /*
212 * Pilfered from ccd.c
213 */
214
215 struct raidbuf {
216 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
217 struct buf *rf_obp; /* ptr. to original I/O buf */
218 int rf_flags; /* misc. flags */
219 RF_DiskQueueData_t *req;/* the request that this was part of.. */
220 };
221
222
223 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
224 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
225
226 /* XXX Not sure if the following should be replacing the raidPtrs above,
227 or if it should be used in conjunction with that... */
228
229 struct raid_softc {
230 int sc_flags; /* flags */
231 int sc_cflags; /* configuration flags */
232 size_t sc_size; /* size of the raid device */
233 dev_t sc_dev; /* our device.. */
234 char sc_xname[20]; /* XXX external name */
235 struct disk sc_dkdev; /* generic disk device info */
236 struct pool sc_cbufpool; /* component buffer pool */
237 struct buf buf_queue; /* used for the device queue */
238 };
239 /* sc_flags */
240 #define RAIDF_INITED 0x01 /* unit has been initialized */
241 #define RAIDF_WLABEL 0x02 /* label area is writable */
242 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
243 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
244 #define RAIDF_LOCKED 0x80 /* unit is locked */
245
246 #define raidunit(x) DISKUNIT(x)
247 static int numraid = 0;
248
249 /*
250 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
251 * Be aware that large numbers can allow the driver to consume a lot of
252 * kernel memory, especially on writes, and in degraded mode reads.
253 *
254 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
255 * a single 64K write will typically require 64K for the old data,
256 * 64K for the old parity, and 64K for the new parity, for a total
257 * of 192K (if the parity buffer is not re-used immediately).
258 * Even it if is used immedately, that's still 128K, which when multiplied
259 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
260 *
261 * Now in degraded mode, for example, a 64K read on the above setup may
262 * require data reconstruction, which will require *all* of the 4 remaining
263 * disks to participate -- 4 * 32K/disk == 128K again.
264 */
265
266 #ifndef RAIDOUTSTANDING
267 #define RAIDOUTSTANDING 6
268 #endif
269
270 #define RAIDLABELDEV(dev) \
271 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
272
273 /* declared here, and made public, for the benefit of KVM stuff.. */
274 struct raid_softc *raid_softc;
275
276 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
277 struct disklabel *));
278 static void raidgetdisklabel __P((dev_t));
279 static void raidmakedisklabel __P((struct raid_softc *));
280
281 static int raidlock __P((struct raid_softc *));
282 static void raidunlock __P((struct raid_softc *));
283 int raidlookup __P((char *, struct proc * p, struct vnode **));
284
285 static void rf_markalldirty __P((RF_Raid_t *));
286
287 void rf_ReconThread __P((struct rf_recon_req *));
288 /* XXX what I want is: */
289 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
290 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
291 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
292 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
293
294 void
295 raidattach(num)
296 int num;
297 {
298 int raidID;
299 int i, rc;
300
301 #ifdef DEBUG
302 printf("raidattach: Asked for %d units\n", num);
303 #endif
304
305 if (num <= 0) {
306 #ifdef DIAGNOSTIC
307 panic("raidattach: count <= 0");
308 #endif
309 return;
310 }
311 /* This is where all the initialization stuff gets done. */
312
313 /* Make some space for requested number of units... */
314
315 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
316 if (raidPtrs == NULL) {
317 panic("raidPtrs is NULL!!\n");
318 }
319
320 rc = rf_mutex_init(&rf_sparet_wait_mutex);
321 if (rc) {
322 RF_PANIC();
323 }
324
325 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
326
327 for (i = 0; i < numraid; i++)
328 raidPtrs[i] = NULL;
329 rc = rf_BootRaidframe();
330 if (rc == 0)
331 printf("Kernelized RAIDframe activated\n");
332 else
333 panic("Serious error booting RAID!!\n");
334
335 /* put together some datastructures like the CCD device does.. This
336 * lets us lock the device and what-not when it gets opened. */
337
338 raid_softc = (struct raid_softc *)
339 malloc(num * sizeof(struct raid_softc),
340 M_RAIDFRAME, M_NOWAIT);
341 if (raid_softc == NULL) {
342 printf("WARNING: no memory for RAIDframe driver\n");
343 return;
344 }
345 numraid = num;
346 bzero(raid_softc, num * sizeof(struct raid_softc));
347
348 for (raidID = 0; raidID < num; raidID++) {
349 raid_softc[raidID].buf_queue.b_actf = NULL;
350 raid_softc[raidID].buf_queue.b_actb =
351 &raid_softc[raidID].buf_queue.b_actf;
352 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
353 (RF_Raid_t *));
354 if (raidPtrs[raidID] == NULL) {
355 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
356 numraid = raidID;
357 return;
358 }
359 }
360 }
361
362
363 int
364 raidsize(dev)
365 dev_t dev;
366 {
367 struct raid_softc *rs;
368 struct disklabel *lp;
369 int part, unit, omask, size;
370
371 unit = raidunit(dev);
372 if (unit >= numraid)
373 return (-1);
374 rs = &raid_softc[unit];
375
376 if ((rs->sc_flags & RAIDF_INITED) == 0)
377 return (-1);
378
379 part = DISKPART(dev);
380 omask = rs->sc_dkdev.dk_openmask & (1 << part);
381 lp = rs->sc_dkdev.dk_label;
382
383 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
384 return (-1);
385
386 if (lp->d_partitions[part].p_fstype != FS_SWAP)
387 size = -1;
388 else
389 size = lp->d_partitions[part].p_size *
390 (lp->d_secsize / DEV_BSIZE);
391
392 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
393 return (-1);
394
395 return (size);
396
397 }
398
399 int
400 raiddump(dev, blkno, va, size)
401 dev_t dev;
402 daddr_t blkno;
403 caddr_t va;
404 size_t size;
405 {
406 /* Not implemented. */
407 return ENXIO;
408 }
409 /* ARGSUSED */
410 int
411 raidopen(dev, flags, fmt, p)
412 dev_t dev;
413 int flags, fmt;
414 struct proc *p;
415 {
416 int unit = raidunit(dev);
417 struct raid_softc *rs;
418 struct disklabel *lp;
419 int part, pmask;
420 int error = 0;
421
422 if (unit >= numraid)
423 return (ENXIO);
424 rs = &raid_softc[unit];
425
426 if ((error = raidlock(rs)) != 0)
427 return (error);
428 lp = rs->sc_dkdev.dk_label;
429
430 part = DISKPART(dev);
431 pmask = (1 << part);
432
433 db1_printf(("Opening raid device number: %d partition: %d\n",
434 unit, part));
435
436
437 if ((rs->sc_flags & RAIDF_INITED) &&
438 (rs->sc_dkdev.dk_openmask == 0))
439 raidgetdisklabel(dev);
440
441 /* make sure that this partition exists */
442
443 if (part != RAW_PART) {
444 db1_printf(("Not a raw partition..\n"));
445 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
446 ((part >= lp->d_npartitions) ||
447 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
448 error = ENXIO;
449 raidunlock(rs);
450 db1_printf(("Bailing out...\n"));
451 return (error);
452 }
453 }
454 /* Prevent this unit from being unconfigured while open. */
455 switch (fmt) {
456 case S_IFCHR:
457 rs->sc_dkdev.dk_copenmask |= pmask;
458 break;
459
460 case S_IFBLK:
461 rs->sc_dkdev.dk_bopenmask |= pmask;
462 break;
463 }
464
465 if ((rs->sc_dkdev.dk_openmask == 0) &&
466 ((rs->sc_flags & RAIDF_INITED) != 0)) {
467 /* First one... mark things as dirty... Note that we *MUST*
468 have done a configure before this. I DO NOT WANT TO BE
469 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
470 THAT THEY BELONG TOGETHER!!!!! */
471 /* XXX should check to see if we're only open for reading
472 here... If so, we needn't do this, but then need some
473 other way of keeping track of what's happened.. */
474
475 rf_markalldirty( raidPtrs[unit] );
476 }
477
478
479 rs->sc_dkdev.dk_openmask =
480 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
481
482 raidunlock(rs);
483
484 return (error);
485
486
487 }
488 /* ARGSUSED */
489 int
490 raidclose(dev, flags, fmt, p)
491 dev_t dev;
492 int flags, fmt;
493 struct proc *p;
494 {
495 int unit = raidunit(dev);
496 struct raid_softc *rs;
497 int error = 0;
498 int part;
499
500 if (unit >= numraid)
501 return (ENXIO);
502 rs = &raid_softc[unit];
503
504 if ((error = raidlock(rs)) != 0)
505 return (error);
506
507 part = DISKPART(dev);
508
509 /* ...that much closer to allowing unconfiguration... */
510 switch (fmt) {
511 case S_IFCHR:
512 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
513 break;
514
515 case S_IFBLK:
516 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
517 break;
518 }
519 rs->sc_dkdev.dk_openmask =
520 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
521
522 if ((rs->sc_dkdev.dk_openmask == 0) &&
523 ((rs->sc_flags & RAIDF_INITED) != 0)) {
524 /* Last one... device is not unconfigured yet.
525 Device shutdown has taken care of setting the
526 clean bits if RAIDF_INITED is not set
527 mark things as clean... */
528 rf_update_component_labels( raidPtrs[unit] );
529 }
530
531 raidunlock(rs);
532 return (0);
533
534 }
535
536 void
537 raidstrategy(bp)
538 register struct buf *bp;
539 {
540 register int s;
541
542 unsigned int raidID = raidunit(bp->b_dev);
543 RF_Raid_t *raidPtr;
544 struct raid_softc *rs = &raid_softc[raidID];
545 struct disklabel *lp;
546 struct buf *dp;
547 int wlabel;
548
549 if ((rs->sc_flags & RAIDF_INITED) ==0) {
550 bp->b_error = ENXIO;
551 bp->b_flags = B_ERROR;
552 bp->b_resid = bp->b_bcount;
553 biodone(bp);
554 return;
555 }
556 if (raidID >= numraid || !raidPtrs[raidID]) {
557 bp->b_error = ENODEV;
558 bp->b_flags |= B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 raidPtr = raidPtrs[raidID];
564 if (!raidPtr->valid) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 if (bp->b_bcount == 0) {
572 db1_printf(("b_bcount is zero..\n"));
573 biodone(bp);
574 return;
575 }
576 lp = rs->sc_dkdev.dk_label;
577
578 /*
579 * Do bounds checking and adjust transfer. If there's an
580 * error, the bounds check will flag that for us.
581 */
582
583 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
584 if (DISKPART(bp->b_dev) != RAW_PART)
585 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
586 db1_printf(("Bounds check failed!!:%d %d\n",
587 (int) bp->b_blkno, (int) wlabel));
588 biodone(bp);
589 return;
590 }
591 s = splbio();
592
593 bp->b_resid = 0;
594
595 /* stuff it onto our queue */
596
597 dp = &rs->buf_queue;
598 bp->b_actf = NULL;
599 bp->b_actb = dp->b_actb;
600 *dp->b_actb = bp;
601 dp->b_actb = &bp->b_actf;
602
603 raidstart(raidPtrs[raidID]);
604
605 splx(s);
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 RF_Config_t *k_cfg, *u_cfg;
665 RF_Raid_t *raid;
666 RF_AccTotals_t *totals;
667 RF_DeviceConfig_t *d_cfg, **ucfgp;
668 u_char *specific_buf;
669 int retcode = 0;
670 int row;
671 int column;
672 struct rf_recon_req *rrcopy, *rr;
673 RF_ComponentLabel_t *component_label;
674 RF_ComponentLabel_t ci_label;
675 RF_ComponentLabel_t **c_label_ptr;
676 RF_SingleComponent_t *sparePtr,*componentPtr;
677 RF_SingleComponent_t hot_spare;
678 RF_SingleComponent_t component;
679 int i, j, d;
680
681 if (unit >= numraid)
682 return (ENXIO);
683 rs = &raid_softc[unit];
684
685 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
686 (int) DISKPART(dev), (int) unit, (int) cmd));
687
688 /* Must be open for writes for these commands... */
689 switch (cmd) {
690 case DIOCSDINFO:
691 case DIOCWDINFO:
692 case DIOCWLABEL:
693 if ((flag & FWRITE) == 0)
694 return (EBADF);
695 }
696
697 /* Must be initialized for these... */
698 switch (cmd) {
699 case DIOCGDINFO:
700 case DIOCSDINFO:
701 case DIOCWDINFO:
702 case DIOCGPART:
703 case DIOCWLABEL:
704 case DIOCGDEFLABEL:
705 case RAIDFRAME_SHUTDOWN:
706 case RAIDFRAME_REWRITEPARITY:
707 case RAIDFRAME_GET_INFO:
708 case RAIDFRAME_RESET_ACCTOTALS:
709 case RAIDFRAME_GET_ACCTOTALS:
710 case RAIDFRAME_KEEP_ACCTOTALS:
711 case RAIDFRAME_GET_SIZE:
712 case RAIDFRAME_FAIL_DISK:
713 case RAIDFRAME_COPYBACK:
714 case RAIDFRAME_CHECK_RECON_STATUS:
715 case RAIDFRAME_GET_COMPONENT_LABEL:
716 case RAIDFRAME_SET_COMPONENT_LABEL:
717 case RAIDFRAME_ADD_HOT_SPARE:
718 case RAIDFRAME_REMOVE_HOT_SPARE:
719 case RAIDFRAME_INIT_LABELS:
720 case RAIDFRAME_REBUILD_IN_PLACE:
721 case RAIDFRAME_CHECK_PARITY:
722 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
723 case RAIDFRAME_CHECK_COPYBACK_STATUS:
724 if ((rs->sc_flags & RAIDF_INITED) == 0)
725 return (ENXIO);
726 }
727
728 switch (cmd) {
729
730
731 /* configure the system */
732 case RAIDFRAME_CONFIGURE:
733
734 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
735 /* copy-in the configuration information */
736 /* data points to a pointer to the configuration structure */
737 u_cfg = *((RF_Config_t **) data);
738 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
739 if (k_cfg == NULL) {
740 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
741 return (ENOMEM);
742 }
743 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
744 sizeof(RF_Config_t));
745 if (retcode) {
746 RF_Free(k_cfg, sizeof(RF_Config_t));
747 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
748 retcode));
749 return (retcode);
750 }
751 /* allocate a buffer for the layout-specific data, and copy it
752 * in */
753 if (k_cfg->layoutSpecificSize) {
754 if (k_cfg->layoutSpecificSize > 10000) {
755 /* sanity check */
756 RF_Free(k_cfg, sizeof(RF_Config_t));
757 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
758 return (EINVAL);
759 }
760 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
761 (u_char *));
762 if (specific_buf == NULL) {
763 RF_Free(k_cfg, sizeof(RF_Config_t));
764 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
765 return (ENOMEM);
766 }
767 retcode = copyin(k_cfg->layoutSpecific,
768 (caddr_t) specific_buf,
769 k_cfg->layoutSpecificSize);
770 if (retcode) {
771 RF_Free(k_cfg, sizeof(RF_Config_t));
772 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
773 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
774 retcode));
775 return (retcode);
776 }
777 } else
778 specific_buf = NULL;
779 k_cfg->layoutSpecific = specific_buf;
780
781 /* should do some kind of sanity check on the configuration.
782 * Store the sum of all the bytes in the last byte? */
783
784 /* configure the system */
785
786 raidPtrs[unit]->raidid = unit;
787
788 retcode = rf_Configure(raidPtrs[unit], k_cfg);
789
790 if (retcode == 0) {
791
792 /* allow this many simultaneous IO's to
793 this RAID device */
794 raidPtrs[unit]->openings = RAIDOUTSTANDING;
795
796 /* XXX should be moved to rf_Configure() */
797
798 raidPtrs[unit]->copyback_in_progress = 0;
799 raidPtrs[unit]->parity_rewrite_in_progress = 0;
800 raidPtrs[unit]->recon_in_progress = 0;
801
802 retcode = raidinit(dev, raidPtrs[unit], unit);
803 rf_markalldirty( raidPtrs[unit] );
804 }
805 /* free the buffers. No return code here. */
806 if (k_cfg->layoutSpecificSize) {
807 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
808 }
809 RF_Free(k_cfg, sizeof(RF_Config_t));
810
811 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
812 retcode));
813
814 return (retcode);
815
816 /* shutdown the system */
817 case RAIDFRAME_SHUTDOWN:
818
819 if ((error = raidlock(rs)) != 0)
820 return (error);
821
822 /*
823 * If somebody has a partition mounted, we shouldn't
824 * shutdown.
825 */
826
827 part = DISKPART(dev);
828 pmask = (1 << part);
829 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
830 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
831 (rs->sc_dkdev.dk_copenmask & pmask))) {
832 raidunlock(rs);
833 return (EBUSY);
834 }
835
836 retcode = rf_Shutdown(raidPtrs[unit]);
837
838 pool_destroy(&rs->sc_cbufpool);
839
840 /* It's no longer initialized... */
841 rs->sc_flags &= ~RAIDF_INITED;
842
843 /* Detach the disk. */
844 disk_detach(&rs->sc_dkdev);
845
846 raidunlock(rs);
847
848 return (retcode);
849 case RAIDFRAME_GET_COMPONENT_LABEL:
850 c_label_ptr = (RF_ComponentLabel_t **) data;
851 /* need to read the component label for the disk indicated
852 by row,column in component_label
853 XXX need to sanity check these values!!!
854 */
855
856 /* For practice, let's get it directly fromdisk, rather
857 than from the in-core copy */
858 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
859 (RF_ComponentLabel_t *));
860 if (component_label == NULL)
861 return (ENOMEM);
862
863 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
864
865 retcode = copyin( *c_label_ptr, component_label,
866 sizeof(RF_ComponentLabel_t));
867
868 if (retcode) {
869 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
870 return(retcode);
871 }
872
873 row = component_label->row;
874 column = component_label->column;
875
876 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
877 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
878 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
879 return(EINVAL);
880 }
881
882 raidread_component_label(
883 raidPtrs[unit]->Disks[row][column].dev,
884 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
885 component_label );
886
887 retcode = copyout((caddr_t) component_label,
888 (caddr_t) *c_label_ptr,
889 sizeof(RF_ComponentLabel_t));
890 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
891 return (retcode);
892
893 case RAIDFRAME_SET_COMPONENT_LABEL:
894 component_label = (RF_ComponentLabel_t *) data;
895
896 /* XXX check the label for valid stuff... */
897 /* Note that some things *should not* get modified --
898 the user should be re-initing the labels instead of
899 trying to patch things.
900 */
901
902 printf("Got component label:\n");
903 printf("Version: %d\n",component_label->version);
904 printf("Serial Number: %d\n",component_label->serial_number);
905 printf("Mod counter: %d\n",component_label->mod_counter);
906 printf("Row: %d\n", component_label->row);
907 printf("Column: %d\n", component_label->column);
908 printf("Num Rows: %d\n", component_label->num_rows);
909 printf("Num Columns: %d\n", component_label->num_columns);
910 printf("Clean: %d\n", component_label->clean);
911 printf("Status: %d\n", component_label->status);
912
913 row = component_label->row;
914 column = component_label->column;
915
916 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
917 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
918 return(EINVAL);
919 }
920
921 /* XXX this isn't allowed to do anything for now :-) */
922 #if 0
923 raidwrite_component_label(
924 raidPtrs[unit]->Disks[row][column].dev,
925 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
926 component_label );
927 #endif
928 return (0);
929
930 case RAIDFRAME_INIT_LABELS:
931 component_label = (RF_ComponentLabel_t *) data;
932 /*
933 we only want the serial number from
934 the above. We get all the rest of the information
935 from the config that was used to create this RAID
936 set.
937 */
938
939 raidPtrs[unit]->serial_number = component_label->serial_number;
940 /* current version number */
941 ci_label.version = RF_COMPONENT_LABEL_VERSION;
942 ci_label.serial_number = component_label->serial_number;
943 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
944 ci_label.num_rows = raidPtrs[unit]->numRow;
945 ci_label.num_columns = raidPtrs[unit]->numCol;
946 ci_label.clean = RF_RAID_DIRTY; /* not clean */
947 ci_label.status = rf_ds_optimal; /* "It's good!" */
948
949 for(row=0;row<raidPtrs[unit]->numRow;row++) {
950 ci_label.row = row;
951 for(column=0;column<raidPtrs[unit]->numCol;column++) {
952 ci_label.column = column;
953 raidwrite_component_label(
954 raidPtrs[unit]->Disks[row][column].dev,
955 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
956 &ci_label );
957 }
958 }
959
960 return (retcode);
961
962 /* initialize all parity */
963 case RAIDFRAME_REWRITEPARITY:
964
965 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
966 /* Parity for RAID 0 is trivially correct */
967 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
968 return(0);
969 }
970
971 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
972 /* Re-write is already in progress! */
973 return(EINVAL);
974 }
975
976 /* borrow the thread of the requesting process */
977
978 retcode = RF_CREATE_THREAD(raidPtrs[unit]->parity_rewrite_thread,
979 rf_RewriteParityThread,
980 raidPtrs[unit],"raid_parity");
981 return (retcode);
982
983
984 case RAIDFRAME_ADD_HOT_SPARE:
985 sparePtr = (RF_SingleComponent_t *) data;
986 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
987 printf("Adding spare\n");
988 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
989 return(retcode);
990
991 case RAIDFRAME_REMOVE_HOT_SPARE:
992 return(retcode);
993
994 case RAIDFRAME_REBUILD_IN_PLACE:
995
996 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
997 /* Can't do this on a RAID 0!! */
998 return(EINVAL);
999 }
1000
1001 if (raidPtrs[unit]->recon_in_progress == 1) {
1002 /* a reconstruct is already in progress! */
1003 return(EINVAL);
1004 }
1005
1006 componentPtr = (RF_SingleComponent_t *) data;
1007 memcpy( &component, componentPtr,
1008 sizeof(RF_SingleComponent_t));
1009 row = component.row;
1010 column = component.column;
1011 printf("Rebuild: %d %d\n",row, column);
1012 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1013 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1014 return(EINVAL);
1015 }
1016
1017 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1018 if (rrcopy == NULL)
1019 return(ENOMEM);
1020
1021 rrcopy->raidPtr = (void *) raidPtrs[unit];
1022 rrcopy->row = row;
1023 rrcopy->col = column;
1024
1025 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1026 rf_ReconstructInPlaceThread,
1027 rrcopy,"raid_reconip");
1028 return(retcode);
1029
1030 case RAIDFRAME_GET_INFO:
1031 raid = raidPtrs[unit];
1032
1033 if (!raid->valid)
1034 return (ENODEV);
1035 ucfgp = (RF_DeviceConfig_t **) data;
1036 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1037 (RF_DeviceConfig_t *));
1038 if (d_cfg == NULL)
1039 return (ENOMEM);
1040 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1041 d_cfg->rows = raid->numRow;
1042 d_cfg->cols = raid->numCol;
1043 d_cfg->ndevs = raid->numRow * raid->numCol;
1044 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1045 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1046 return (ENOMEM);
1047 }
1048 d_cfg->nspares = raid->numSpare;
1049 if (d_cfg->nspares >= RF_MAX_DISKS) {
1050 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1051 return (ENOMEM);
1052 }
1053 d_cfg->maxqdepth = raid->maxQueueDepth;
1054 d = 0;
1055 for (i = 0; i < d_cfg->rows; i++) {
1056 for (j = 0; j < d_cfg->cols; j++) {
1057 d_cfg->devs[d] = raid->Disks[i][j];
1058 d++;
1059 }
1060 }
1061 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1062 d_cfg->spares[i] = raid->Disks[0][j];
1063 }
1064 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1065 sizeof(RF_DeviceConfig_t));
1066 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1067
1068 return (retcode);
1069
1070 case RAIDFRAME_CHECK_PARITY:
1071 *(int *) data = raidPtrs[unit]->parity_good;
1072 return (0);
1073
1074 case RAIDFRAME_RESET_ACCTOTALS:
1075 raid = raidPtrs[unit];
1076 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1077 return (0);
1078
1079 case RAIDFRAME_GET_ACCTOTALS:
1080 totals = (RF_AccTotals_t *) data;
1081 raid = raidPtrs[unit];
1082 *totals = raid->acc_totals;
1083 return (0);
1084
1085 case RAIDFRAME_KEEP_ACCTOTALS:
1086 raidPtrs[unit]->keep_acc_totals = *(int *)data;
1087 return (0);
1088
1089 case RAIDFRAME_GET_SIZE:
1090 *(int *) data = raidPtrs[unit]->totalSectors;
1091 return (0);
1092
1093 /* fail a disk & optionally start reconstruction */
1094 case RAIDFRAME_FAIL_DISK:
1095
1096 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1097 /* Can't do this on a RAID 0!! */
1098 return(EINVAL);
1099 }
1100
1101 rr = (struct rf_recon_req *) data;
1102
1103 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1104 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1105 return (EINVAL);
1106
1107 printf("raid%d: Failing the disk: row: %d col: %d\n",
1108 unit, rr->row, rr->col);
1109
1110 /* make a copy of the recon request so that we don't rely on
1111 * the user's buffer */
1112 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1113 if (rrcopy == NULL)
1114 return(ENOMEM);
1115 bcopy(rr, rrcopy, sizeof(*rr));
1116 rrcopy->raidPtr = (void *) raidPtrs[unit];
1117
1118 retcode = RF_CREATE_THREAD(raidPtrs[unit]->recon_thread,
1119 rf_ReconThread,
1120 rrcopy,"raid_recon");
1121 return (0);
1122
1123 /* invoke a copyback operation after recon on whatever disk
1124 * needs it, if any */
1125 case RAIDFRAME_COPYBACK:
1126
1127 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1128 /* This makes no sense on a RAID 0!! */
1129 return(EINVAL);
1130 }
1131
1132 if (raidPtrs[unit]->copyback_in_progress == 1) {
1133 /* Copyback is already in progress! */
1134 return(EINVAL);
1135 }
1136
1137 retcode = RF_CREATE_THREAD(raidPtrs[unit]->copyback_thread,
1138 rf_CopybackThread,
1139 raidPtrs[unit],"raid_copyback");
1140 return (retcode);
1141
1142 /* return the percentage completion of reconstruction */
1143 case RAIDFRAME_CHECK_RECON_STATUS:
1144 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1145 /* This makes no sense on a RAID 0 */
1146 return(EINVAL);
1147 }
1148 row = 0; /* XXX we only consider a single row... */
1149 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1150 *(int *) data = 100;
1151 else
1152 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1153 return (0);
1154
1155 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1156 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1157 /* This makes no sense on a RAID 0 */
1158 return(EINVAL);
1159 }
1160 if (raidPtrs[unit]->parity_rewrite_in_progress == 1) {
1161 *(int *) data = 100 * raidPtrs[unit]->parity_rewrite_stripes_done / raidPtrs[unit]->Layout.numStripe;
1162 } else {
1163 *(int *) data = 100;
1164 }
1165 return (0);
1166
1167 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1168 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1169 /* This makes no sense on a RAID 0 */
1170 return(EINVAL);
1171 }
1172 if (raidPtrs[unit]->copyback_in_progress == 1) {
1173 *(int *) data = 100 * raidPtrs[unit]->copyback_stripes_done / raidPtrs[unit]->Layout.numStripe;
1174 } else {
1175 *(int *) data = 100;
1176 }
1177 return (0);
1178
1179
1180 /* the sparetable daemon calls this to wait for the kernel to
1181 * need a spare table. this ioctl does not return until a
1182 * spare table is needed. XXX -- calling mpsleep here in the
1183 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1184 * -- I should either compute the spare table in the kernel,
1185 * or have a different -- XXX XXX -- interface (a different
1186 * character device) for delivering the table -- XXX */
1187 #if 0
1188 case RAIDFRAME_SPARET_WAIT:
1189 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1190 while (!rf_sparet_wait_queue)
1191 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1192 waitreq = rf_sparet_wait_queue;
1193 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1194 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1195
1196 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1197
1198 RF_Free(waitreq, sizeof(*waitreq));
1199 return (0);
1200
1201
1202 /* wakes up a process waiting on SPARET_WAIT and puts an error
1203 * code in it that will cause the dameon to exit */
1204 case RAIDFRAME_ABORT_SPARET_WAIT:
1205 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1206 waitreq->fcol = -1;
1207 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1208 waitreq->next = rf_sparet_wait_queue;
1209 rf_sparet_wait_queue = waitreq;
1210 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1211 wakeup(&rf_sparet_wait_queue);
1212 return (0);
1213
1214 /* used by the spare table daemon to deliver a spare table
1215 * into the kernel */
1216 case RAIDFRAME_SEND_SPARET:
1217
1218 /* install the spare table */
1219 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1220
1221 /* respond to the requestor. the return status of the spare
1222 * table installation is passed in the "fcol" field */
1223 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1224 waitreq->fcol = retcode;
1225 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1226 waitreq->next = rf_sparet_resp_queue;
1227 rf_sparet_resp_queue = waitreq;
1228 wakeup(&rf_sparet_resp_queue);
1229 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1230
1231 return (retcode);
1232 #endif
1233
1234 default:
1235 break; /* fall through to the os-specific code below */
1236
1237 }
1238
1239 if (!raidPtrs[unit]->valid)
1240 return (EINVAL);
1241
1242 /*
1243 * Add support for "regular" device ioctls here.
1244 */
1245
1246 switch (cmd) {
1247 case DIOCGDINFO:
1248 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1249 break;
1250
1251 case DIOCGPART:
1252 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1253 ((struct partinfo *) data)->part =
1254 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1255 break;
1256
1257 case DIOCWDINFO:
1258 case DIOCSDINFO:
1259 if ((error = raidlock(rs)) != 0)
1260 return (error);
1261
1262 rs->sc_flags |= RAIDF_LABELLING;
1263
1264 error = setdisklabel(rs->sc_dkdev.dk_label,
1265 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1266 if (error == 0) {
1267 if (cmd == DIOCWDINFO)
1268 error = writedisklabel(RAIDLABELDEV(dev),
1269 raidstrategy, rs->sc_dkdev.dk_label,
1270 rs->sc_dkdev.dk_cpulabel);
1271 }
1272 rs->sc_flags &= ~RAIDF_LABELLING;
1273
1274 raidunlock(rs);
1275
1276 if (error)
1277 return (error);
1278 break;
1279
1280 case DIOCWLABEL:
1281 if (*(int *) data != 0)
1282 rs->sc_flags |= RAIDF_WLABEL;
1283 else
1284 rs->sc_flags &= ~RAIDF_WLABEL;
1285 break;
1286
1287 case DIOCGDEFLABEL:
1288 raidgetdefaultlabel(raidPtrs[unit], rs,
1289 (struct disklabel *) data);
1290 break;
1291
1292 default:
1293 retcode = ENOTTY;
1294 }
1295 return (retcode);
1296
1297 }
1298
1299
1300 /* raidinit -- complete the rest of the initialization for the
1301 RAIDframe device. */
1302
1303
1304 static int
1305 raidinit(dev, raidPtr, unit)
1306 dev_t dev;
1307 RF_Raid_t *raidPtr;
1308 int unit;
1309 {
1310 int retcode;
1311 struct raid_softc *rs;
1312
1313 retcode = 0;
1314
1315 rs = &raid_softc[unit];
1316 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1317 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1318
1319
1320 /* XXX should check return code first... */
1321 rs->sc_flags |= RAIDF_INITED;
1322
1323 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1324
1325 rs->sc_dkdev.dk_name = rs->sc_xname;
1326
1327 /* disk_attach actually creates space for the CPU disklabel, among
1328 * other things, so it's critical to call this *BEFORE* we try putzing
1329 * with disklabels. */
1330
1331 disk_attach(&rs->sc_dkdev);
1332
1333 /* XXX There may be a weird interaction here between this, and
1334 * protectedSectors, as used in RAIDframe. */
1335
1336 rs->sc_size = raidPtr->totalSectors;
1337 rs->sc_dev = dev;
1338
1339 return (retcode);
1340 }
1341
1342 /* wake up the daemon & tell it to get us a spare table
1343 * XXX
1344 * the entries in the queues should be tagged with the raidPtr
1345 * so that in the extremely rare case that two recons happen at once,
1346 * we know for which device were requesting a spare table
1347 * XXX
1348 *
1349 * XXX This code is not currently used. GO
1350 */
1351 int
1352 rf_GetSpareTableFromDaemon(req)
1353 RF_SparetWait_t *req;
1354 {
1355 int retcode;
1356
1357 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1358 req->next = rf_sparet_wait_queue;
1359 rf_sparet_wait_queue = req;
1360 wakeup(&rf_sparet_wait_queue);
1361
1362 /* mpsleep unlocks the mutex */
1363 while (!rf_sparet_resp_queue) {
1364 tsleep(&rf_sparet_resp_queue, PRIBIO,
1365 "raidframe getsparetable", 0);
1366 }
1367 req = rf_sparet_resp_queue;
1368 rf_sparet_resp_queue = req->next;
1369 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1370
1371 retcode = req->fcol;
1372 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1373 * alloc'd */
1374 return (retcode);
1375 }
1376
1377 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1378 * bp & passes it down.
1379 * any calls originating in the kernel must use non-blocking I/O
1380 * do some extra sanity checking to return "appropriate" error values for
1381 * certain conditions (to make some standard utilities work)
1382 *
1383 * Formerly known as: rf_DoAccessKernel
1384 */
1385 void
1386 raidstart(raidPtr)
1387 RF_Raid_t *raidPtr;
1388 {
1389 RF_SectorCount_t num_blocks, pb, sum;
1390 RF_RaidAddr_t raid_addr;
1391 int retcode;
1392 struct partition *pp;
1393 daddr_t blocknum;
1394 int unit;
1395 struct raid_softc *rs;
1396 int do_async;
1397 struct buf *bp;
1398 struct buf *dp;
1399
1400 unit = raidPtr->raidid;
1401 rs = &raid_softc[unit];
1402
1403 /* Check to see if we're at the limit... */
1404 RF_LOCK_MUTEX(raidPtr->mutex);
1405 while (raidPtr->openings > 0) {
1406 RF_UNLOCK_MUTEX(raidPtr->mutex);
1407
1408 /* get the next item, if any, from the queue */
1409 dp = &rs->buf_queue;
1410 bp = dp->b_actf;
1411 if (bp == NULL) {
1412 /* nothing more to do */
1413 return;
1414 }
1415
1416 /* update structures */
1417 dp = bp->b_actf;
1418 if (dp != NULL) {
1419 dp->b_actb = bp->b_actb;
1420 } else {
1421 rs->buf_queue.b_actb = bp->b_actb;
1422 }
1423 *bp->b_actb = dp;
1424
1425 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1426 * partition.. Need to make it absolute to the underlying
1427 * device.. */
1428
1429 blocknum = bp->b_blkno;
1430 if (DISKPART(bp->b_dev) != RAW_PART) {
1431 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1432 blocknum += pp->p_offset;
1433 }
1434
1435 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1436 (int) blocknum));
1437
1438 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1439 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1440
1441 /* *THIS* is where we adjust what block we're going to...
1442 * but DO NOT TOUCH bp->b_blkno!!! */
1443 raid_addr = blocknum;
1444
1445 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1446 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1447 sum = raid_addr + num_blocks + pb;
1448 if (1 || rf_debugKernelAccess) {
1449 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1450 (int) raid_addr, (int) sum, (int) num_blocks,
1451 (int) pb, (int) bp->b_resid));
1452 }
1453 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1454 || (sum < num_blocks) || (sum < pb)) {
1455 bp->b_error = ENOSPC;
1456 bp->b_flags |= B_ERROR;
1457 bp->b_resid = bp->b_bcount;
1458 biodone(bp);
1459 RF_LOCK_MUTEX(raidPtr->mutex);
1460 continue;
1461 }
1462 /*
1463 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1464 */
1465
1466 if (bp->b_bcount & raidPtr->sectorMask) {
1467 bp->b_error = EINVAL;
1468 bp->b_flags |= B_ERROR;
1469 bp->b_resid = bp->b_bcount;
1470 biodone(bp);
1471 RF_LOCK_MUTEX(raidPtr->mutex);
1472 continue;
1473
1474 }
1475 db1_printf(("Calling DoAccess..\n"));
1476
1477
1478 RF_LOCK_MUTEX(raidPtr->mutex);
1479 raidPtr->openings--;
1480 RF_UNLOCK_MUTEX(raidPtr->mutex);
1481
1482 /*
1483 * Everything is async.
1484 */
1485 do_async = 1;
1486
1487 /* don't ever condition on bp->b_flags & B_WRITE.
1488 * always condition on B_READ instead */
1489
1490 /* XXX we're still at splbio() here... do we *really*
1491 need to be? */
1492
1493
1494 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1495 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1496 do_async, raid_addr, num_blocks,
1497 bp->b_un.b_addr, bp, NULL, NULL,
1498 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1499
1500
1501 RF_LOCK_MUTEX(raidPtr->mutex);
1502 }
1503 RF_UNLOCK_MUTEX(raidPtr->mutex);
1504 }
1505
1506
1507
1508
1509 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1510
1511 int
1512 rf_DispatchKernelIO(queue, req)
1513 RF_DiskQueue_t *queue;
1514 RF_DiskQueueData_t *req;
1515 {
1516 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1517 struct buf *bp;
1518 struct raidbuf *raidbp = NULL;
1519 struct raid_softc *rs;
1520 int unit;
1521 int s;
1522
1523 s=0;
1524 /* s = splbio();*/ /* want to test this */
1525 /* XXX along with the vnode, we also need the softc associated with
1526 * this device.. */
1527
1528 req->queue = queue;
1529
1530 unit = queue->raidPtr->raidid;
1531
1532 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1533
1534 if (unit >= numraid) {
1535 printf("Invalid unit number: %d %d\n", unit, numraid);
1536 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1537 }
1538 rs = &raid_softc[unit];
1539
1540 /* XXX is this the right place? */
1541 disk_busy(&rs->sc_dkdev);
1542
1543 bp = req->bp;
1544 #if 1
1545 /* XXX when there is a physical disk failure, someone is passing us a
1546 * buffer that contains old stuff!! Attempt to deal with this problem
1547 * without taking a performance hit... (not sure where the real bug
1548 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1549
1550 if (bp->b_flags & B_ERROR) {
1551 bp->b_flags &= ~B_ERROR;
1552 }
1553 if (bp->b_error != 0) {
1554 bp->b_error = 0;
1555 }
1556 #endif
1557 raidbp = RAIDGETBUF(rs);
1558
1559 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1560
1561 /*
1562 * context for raidiodone
1563 */
1564 raidbp->rf_obp = bp;
1565 raidbp->req = req;
1566
1567 LIST_INIT(&raidbp->rf_buf.b_dep);
1568
1569 switch (req->type) {
1570 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1571 /* XXX need to do something extra here.. */
1572 /* I'm leaving this in, as I've never actually seen it used,
1573 * and I'd like folks to report it... GO */
1574 printf(("WAKEUP CALLED\n"));
1575 queue->numOutstanding++;
1576
1577 /* XXX need to glue the original buffer into this?? */
1578
1579 KernelWakeupFunc(&raidbp->rf_buf);
1580 break;
1581
1582 case RF_IO_TYPE_READ:
1583 case RF_IO_TYPE_WRITE:
1584
1585 if (req->tracerec) {
1586 RF_ETIMER_START(req->tracerec->timer);
1587 }
1588 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1589 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1590 req->sectorOffset, req->numSector,
1591 req->buf, KernelWakeupFunc, (void *) req,
1592 queue->raidPtr->logBytesPerSector, req->b_proc);
1593
1594 if (rf_debugKernelAccess) {
1595 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1596 (long) bp->b_blkno));
1597 }
1598 queue->numOutstanding++;
1599 queue->last_deq_sector = req->sectorOffset;
1600 /* acc wouldn't have been let in if there were any pending
1601 * reqs at any other priority */
1602 queue->curPriority = req->priority;
1603
1604 db1_printf(("Going for %c to unit %d row %d col %d\n",
1605 req->type, unit, queue->row, queue->col));
1606 db1_printf(("sector %d count %d (%d bytes) %d\n",
1607 (int) req->sectorOffset, (int) req->numSector,
1608 (int) (req->numSector <<
1609 queue->raidPtr->logBytesPerSector),
1610 (int) queue->raidPtr->logBytesPerSector));
1611 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1612 raidbp->rf_buf.b_vp->v_numoutput++;
1613 }
1614 VOP_STRATEGY(&raidbp->rf_buf);
1615
1616 break;
1617
1618 default:
1619 panic("bad req->type in rf_DispatchKernelIO");
1620 }
1621 db1_printf(("Exiting from DispatchKernelIO\n"));
1622 /* splx(s); */ /* want to test this */
1623 return (0);
1624 }
1625 /* this is the callback function associated with a I/O invoked from
1626 kernel code.
1627 */
1628 static void
1629 KernelWakeupFunc(vbp)
1630 struct buf *vbp;
1631 {
1632 RF_DiskQueueData_t *req = NULL;
1633 RF_DiskQueue_t *queue;
1634 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1635 struct buf *bp;
1636 struct raid_softc *rs;
1637 int unit;
1638 register int s;
1639
1640 s = splbio();
1641 db1_printf(("recovering the request queue:\n"));
1642 req = raidbp->req;
1643
1644 bp = raidbp->rf_obp;
1645
1646 queue = (RF_DiskQueue_t *) req->queue;
1647
1648 if (raidbp->rf_buf.b_flags & B_ERROR) {
1649 bp->b_flags |= B_ERROR;
1650 bp->b_error = raidbp->rf_buf.b_error ?
1651 raidbp->rf_buf.b_error : EIO;
1652 }
1653
1654 /* XXX methinks this could be wrong... */
1655 #if 1
1656 bp->b_resid = raidbp->rf_buf.b_resid;
1657 #endif
1658
1659 if (req->tracerec) {
1660 RF_ETIMER_STOP(req->tracerec->timer);
1661 RF_ETIMER_EVAL(req->tracerec->timer);
1662 RF_LOCK_MUTEX(rf_tracing_mutex);
1663 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1664 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1665 req->tracerec->num_phys_ios++;
1666 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1667 }
1668 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1669
1670 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1671
1672
1673 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1674 * ballistic, and mark the component as hosed... */
1675
1676 if (bp->b_flags & B_ERROR) {
1677 /* Mark the disk as dead */
1678 /* but only mark it once... */
1679 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1680 rf_ds_optimal) {
1681 printf("raid%d: IO Error. Marking %s as failed.\n",
1682 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1683 queue->raidPtr->Disks[queue->row][queue->col].status =
1684 rf_ds_failed;
1685 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1686 queue->raidPtr->numFailures++;
1687 /* XXX here we should bump the version number for each component, and write that data out */
1688 } else { /* Disk is already dead... */
1689 /* printf("Disk already marked as dead!\n"); */
1690 }
1691
1692 }
1693
1694 rs = &raid_softc[unit];
1695 RAIDPUTBUF(rs, raidbp);
1696
1697
1698 if (bp->b_resid == 0) {
1699 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1700 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1701 }
1702
1703 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1704 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1705
1706 splx(s);
1707 }
1708
1709
1710
1711 /*
1712 * initialize a buf structure for doing an I/O in the kernel.
1713 */
1714 static void
1715 InitBP(
1716 struct buf * bp,
1717 struct vnode * b_vp,
1718 unsigned rw_flag,
1719 dev_t dev,
1720 RF_SectorNum_t startSect,
1721 RF_SectorCount_t numSect,
1722 caddr_t buf,
1723 void (*cbFunc) (struct buf *),
1724 void *cbArg,
1725 int logBytesPerSector,
1726 struct proc * b_proc)
1727 {
1728 /* bp->b_flags = B_PHYS | rw_flag; */
1729 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1730 bp->b_bcount = numSect << logBytesPerSector;
1731 bp->b_bufsize = bp->b_bcount;
1732 bp->b_error = 0;
1733 bp->b_dev = dev;
1734 bp->b_un.b_addr = buf;
1735 bp->b_blkno = startSect;
1736 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1737 if (bp->b_bcount == 0) {
1738 panic("bp->b_bcount is zero in InitBP!!\n");
1739 }
1740 bp->b_proc = b_proc;
1741 bp->b_iodone = cbFunc;
1742 bp->b_vp = b_vp;
1743
1744 }
1745
1746 static void
1747 raidgetdefaultlabel(raidPtr, rs, lp)
1748 RF_Raid_t *raidPtr;
1749 struct raid_softc *rs;
1750 struct disklabel *lp;
1751 {
1752 db1_printf(("Building a default label...\n"));
1753 bzero(lp, sizeof(*lp));
1754
1755 /* fabricate a label... */
1756 lp->d_secperunit = raidPtr->totalSectors;
1757 lp->d_secsize = raidPtr->bytesPerSector;
1758 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1759 lp->d_ntracks = 1;
1760 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1761 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1762
1763 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1764 lp->d_type = DTYPE_RAID;
1765 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1766 lp->d_rpm = 3600;
1767 lp->d_interleave = 1;
1768 lp->d_flags = 0;
1769
1770 lp->d_partitions[RAW_PART].p_offset = 0;
1771 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1772 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1773 lp->d_npartitions = RAW_PART + 1;
1774
1775 lp->d_magic = DISKMAGIC;
1776 lp->d_magic2 = DISKMAGIC;
1777 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1778
1779 }
1780 /*
1781 * Read the disklabel from the raid device. If one is not present, fake one
1782 * up.
1783 */
1784 static void
1785 raidgetdisklabel(dev)
1786 dev_t dev;
1787 {
1788 int unit = raidunit(dev);
1789 struct raid_softc *rs = &raid_softc[unit];
1790 char *errstring;
1791 struct disklabel *lp = rs->sc_dkdev.dk_label;
1792 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1793 RF_Raid_t *raidPtr;
1794
1795 db1_printf(("Getting the disklabel...\n"));
1796
1797 bzero(clp, sizeof(*clp));
1798
1799 raidPtr = raidPtrs[unit];
1800
1801 raidgetdefaultlabel(raidPtr, rs, lp);
1802
1803 /*
1804 * Call the generic disklabel extraction routine.
1805 */
1806 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1807 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1808 if (errstring)
1809 raidmakedisklabel(rs);
1810 else {
1811 int i;
1812 struct partition *pp;
1813
1814 /*
1815 * Sanity check whether the found disklabel is valid.
1816 *
1817 * This is necessary since total size of the raid device
1818 * may vary when an interleave is changed even though exactly
1819 * same componets are used, and old disklabel may used
1820 * if that is found.
1821 */
1822 if (lp->d_secperunit != rs->sc_size)
1823 printf("WARNING: %s: "
1824 "total sector size in disklabel (%d) != "
1825 "the size of raid (%ld)\n", rs->sc_xname,
1826 lp->d_secperunit, (long) rs->sc_size);
1827 for (i = 0; i < lp->d_npartitions; i++) {
1828 pp = &lp->d_partitions[i];
1829 if (pp->p_offset + pp->p_size > rs->sc_size)
1830 printf("WARNING: %s: end of partition `%c' "
1831 "exceeds the size of raid (%ld)\n",
1832 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1833 }
1834 }
1835
1836 }
1837 /*
1838 * Take care of things one might want to take care of in the event
1839 * that a disklabel isn't present.
1840 */
1841 static void
1842 raidmakedisklabel(rs)
1843 struct raid_softc *rs;
1844 {
1845 struct disklabel *lp = rs->sc_dkdev.dk_label;
1846 db1_printf(("Making a label..\n"));
1847
1848 /*
1849 * For historical reasons, if there's no disklabel present
1850 * the raw partition must be marked FS_BSDFFS.
1851 */
1852
1853 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1854
1855 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1856
1857 lp->d_checksum = dkcksum(lp);
1858 }
1859 /*
1860 * Lookup the provided name in the filesystem. If the file exists,
1861 * is a valid block device, and isn't being used by anyone else,
1862 * set *vpp to the file's vnode.
1863 * You'll find the original of this in ccd.c
1864 */
1865 int
1866 raidlookup(path, p, vpp)
1867 char *path;
1868 struct proc *p;
1869 struct vnode **vpp; /* result */
1870 {
1871 struct nameidata nd;
1872 struct vnode *vp;
1873 struct vattr va;
1874 int error;
1875
1876 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1877 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1878 #ifdef DEBUG
1879 printf("RAIDframe: vn_open returned %d\n", error);
1880 #endif
1881 return (error);
1882 }
1883 vp = nd.ni_vp;
1884 if (vp->v_usecount > 1) {
1885 VOP_UNLOCK(vp, 0);
1886 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1887 return (EBUSY);
1888 }
1889 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1890 VOP_UNLOCK(vp, 0);
1891 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1892 return (error);
1893 }
1894 /* XXX: eventually we should handle VREG, too. */
1895 if (va.va_type != VBLK) {
1896 VOP_UNLOCK(vp, 0);
1897 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1898 return (ENOTBLK);
1899 }
1900 VOP_UNLOCK(vp, 0);
1901 *vpp = vp;
1902 return (0);
1903 }
1904 /*
1905 * Wait interruptibly for an exclusive lock.
1906 *
1907 * XXX
1908 * Several drivers do this; it should be abstracted and made MP-safe.
1909 * (Hmm... where have we seen this warning before :-> GO )
1910 */
1911 static int
1912 raidlock(rs)
1913 struct raid_softc *rs;
1914 {
1915 int error;
1916
1917 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1918 rs->sc_flags |= RAIDF_WANTED;
1919 if ((error =
1920 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1921 return (error);
1922 }
1923 rs->sc_flags |= RAIDF_LOCKED;
1924 return (0);
1925 }
1926 /*
1927 * Unlock and wake up any waiters.
1928 */
1929 static void
1930 raidunlock(rs)
1931 struct raid_softc *rs;
1932 {
1933
1934 rs->sc_flags &= ~RAIDF_LOCKED;
1935 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1936 rs->sc_flags &= ~RAIDF_WANTED;
1937 wakeup(rs);
1938 }
1939 }
1940
1941
1942 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1943 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1944
1945 int
1946 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1947 {
1948 RF_ComponentLabel_t component_label;
1949 raidread_component_label(dev, b_vp, &component_label);
1950 component_label.mod_counter = mod_counter;
1951 component_label.clean = RF_RAID_CLEAN;
1952 raidwrite_component_label(dev, b_vp, &component_label);
1953 return(0);
1954 }
1955
1956
1957 int
1958 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1959 {
1960 RF_ComponentLabel_t component_label;
1961 raidread_component_label(dev, b_vp, &component_label);
1962 component_label.mod_counter = mod_counter;
1963 component_label.clean = RF_RAID_DIRTY;
1964 raidwrite_component_label(dev, b_vp, &component_label);
1965 return(0);
1966 }
1967
1968 /* ARGSUSED */
1969 int
1970 raidread_component_label(dev, b_vp, component_label)
1971 dev_t dev;
1972 struct vnode *b_vp;
1973 RF_ComponentLabel_t *component_label;
1974 {
1975 struct buf *bp;
1976 int error;
1977
1978 /* XXX should probably ensure that we don't try to do this if
1979 someone has changed rf_protected_sectors. */
1980
1981 /* get a block of the appropriate size... */
1982 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1983 bp->b_dev = dev;
1984
1985 /* get our ducks in a row for the read */
1986 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1987 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1988 bp->b_flags = B_BUSY | B_READ;
1989 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1990
1991 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1992
1993 error = biowait(bp);
1994
1995 if (!error) {
1996 memcpy(component_label, bp->b_un.b_addr,
1997 sizeof(RF_ComponentLabel_t));
1998 #if 0
1999 printf("raidread_component_label: got component label:\n");
2000 printf("Version: %d\n",component_label->version);
2001 printf("Serial Number: %d\n",component_label->serial_number);
2002 printf("Mod counter: %d\n",component_label->mod_counter);
2003 printf("Row: %d\n", component_label->row);
2004 printf("Column: %d\n", component_label->column);
2005 printf("Num Rows: %d\n", component_label->num_rows);
2006 printf("Num Columns: %d\n", component_label->num_columns);
2007 printf("Clean: %d\n", component_label->clean);
2008 printf("Status: %d\n", component_label->status);
2009 #endif
2010 } else {
2011 printf("Failed to read RAID component label!\n");
2012 }
2013
2014 bp->b_flags = B_INVAL | B_AGE;
2015 brelse(bp);
2016 return(error);
2017 }
2018 /* ARGSUSED */
2019 int
2020 raidwrite_component_label(dev, b_vp, component_label)
2021 dev_t dev;
2022 struct vnode *b_vp;
2023 RF_ComponentLabel_t *component_label;
2024 {
2025 struct buf *bp;
2026 int error;
2027
2028 /* get a block of the appropriate size... */
2029 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2030 bp->b_dev = dev;
2031
2032 /* get our ducks in a row for the write */
2033 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2034 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2035 bp->b_flags = B_BUSY | B_WRITE;
2036 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2037
2038 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2039
2040 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2041
2042 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2043 error = biowait(bp);
2044 bp->b_flags = B_INVAL | B_AGE;
2045 brelse(bp);
2046 if (error) {
2047 printf("Failed to write RAID component info!\n");
2048 }
2049
2050 return(error);
2051 }
2052
2053 void
2054 rf_markalldirty( raidPtr )
2055 RF_Raid_t *raidPtr;
2056 {
2057 RF_ComponentLabel_t c_label;
2058 int r,c;
2059
2060 raidPtr->mod_counter++;
2061 for (r = 0; r < raidPtr->numRow; r++) {
2062 for (c = 0; c < raidPtr->numCol; c++) {
2063 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2064 raidread_component_label(
2065 raidPtr->Disks[r][c].dev,
2066 raidPtr->raid_cinfo[r][c].ci_vp,
2067 &c_label);
2068 if (c_label.status == rf_ds_spared) {
2069 /* XXX do something special...
2070 but whatever you do, don't
2071 try to access it!! */
2072 } else {
2073 #if 0
2074 c_label.status =
2075 raidPtr->Disks[r][c].status;
2076 raidwrite_component_label(
2077 raidPtr->Disks[r][c].dev,
2078 raidPtr->raid_cinfo[r][c].ci_vp,
2079 &c_label);
2080 #endif
2081 raidmarkdirty(
2082 raidPtr->Disks[r][c].dev,
2083 raidPtr->raid_cinfo[r][c].ci_vp,
2084 raidPtr->mod_counter);
2085 }
2086 }
2087 }
2088 }
2089 /* printf("Component labels marked dirty.\n"); */
2090 #if 0
2091 for( c = 0; c < raidPtr->numSpare ; c++) {
2092 sparecol = raidPtr->numCol + c;
2093 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2094 /*
2095
2096 XXX this is where we get fancy and map this spare
2097 into it's correct spot in the array.
2098
2099 */
2100 /*
2101
2102 we claim this disk is "optimal" if it's
2103 rf_ds_used_spare, as that means it should be
2104 directly substitutable for the disk it replaced.
2105 We note that too...
2106
2107 */
2108
2109 for(i=0;i<raidPtr->numRow;i++) {
2110 for(j=0;j<raidPtr->numCol;j++) {
2111 if ((raidPtr->Disks[i][j].spareRow ==
2112 r) &&
2113 (raidPtr->Disks[i][j].spareCol ==
2114 sparecol)) {
2115 srow = r;
2116 scol = sparecol;
2117 break;
2118 }
2119 }
2120 }
2121
2122 raidread_component_label(
2123 raidPtr->Disks[r][sparecol].dev,
2124 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2125 &c_label);
2126 /* make sure status is noted */
2127 c_label.version = RF_COMPONENT_LABEL_VERSION;
2128 c_label.mod_counter = raidPtr->mod_counter;
2129 c_label.serial_number = raidPtr->serial_number;
2130 c_label.row = srow;
2131 c_label.column = scol;
2132 c_label.num_rows = raidPtr->numRow;
2133 c_label.num_columns = raidPtr->numCol;
2134 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2135 c_label.status = rf_ds_optimal;
2136 raidwrite_component_label(
2137 raidPtr->Disks[r][sparecol].dev,
2138 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2139 &c_label);
2140 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2141 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2142 }
2143 }
2144
2145 #endif
2146 }
2147
2148
2149 void
2150 rf_update_component_labels( raidPtr )
2151 RF_Raid_t *raidPtr;
2152 {
2153 RF_ComponentLabel_t c_label;
2154 int sparecol;
2155 int r,c;
2156 int i,j;
2157 int srow, scol;
2158
2159 srow = -1;
2160 scol = -1;
2161
2162 /* XXX should do extra checks to make sure things really are clean,
2163 rather than blindly setting the clean bit... */
2164
2165 raidPtr->mod_counter++;
2166
2167 for (r = 0; r < raidPtr->numRow; r++) {
2168 for (c = 0; c < raidPtr->numCol; c++) {
2169 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2170 raidread_component_label(
2171 raidPtr->Disks[r][c].dev,
2172 raidPtr->raid_cinfo[r][c].ci_vp,
2173 &c_label);
2174 /* make sure status is noted */
2175 c_label.status = rf_ds_optimal;
2176 raidwrite_component_label(
2177 raidPtr->Disks[r][c].dev,
2178 raidPtr->raid_cinfo[r][c].ci_vp,
2179 &c_label);
2180 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2181 raidmarkclean(
2182 raidPtr->Disks[r][c].dev,
2183 raidPtr->raid_cinfo[r][c].ci_vp,
2184 raidPtr->mod_counter);
2185 }
2186 }
2187 /* else we don't touch it.. */
2188 #if 0
2189 else if (raidPtr->Disks[r][c].status !=
2190 rf_ds_failed) {
2191 raidread_component_label(
2192 raidPtr->Disks[r][c].dev,
2193 raidPtr->raid_cinfo[r][c].ci_vp,
2194 &c_label);
2195 /* make sure status is noted */
2196 c_label.status =
2197 raidPtr->Disks[r][c].status;
2198 raidwrite_component_label(
2199 raidPtr->Disks[r][c].dev,
2200 raidPtr->raid_cinfo[r][c].ci_vp,
2201 &c_label);
2202 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2203 raidmarkclean(
2204 raidPtr->Disks[r][c].dev,
2205 raidPtr->raid_cinfo[r][c].ci_vp,
2206 raidPtr->mod_counter);
2207 }
2208 }
2209 #endif
2210 }
2211 }
2212
2213 for( c = 0; c < raidPtr->numSpare ; c++) {
2214 sparecol = raidPtr->numCol + c;
2215 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2216 /*
2217
2218 we claim this disk is "optimal" if it's
2219 rf_ds_used_spare, as that means it should be
2220 directly substitutable for the disk it replaced.
2221 We note that too...
2222
2223 */
2224
2225 for(i=0;i<raidPtr->numRow;i++) {
2226 for(j=0;j<raidPtr->numCol;j++) {
2227 if ((raidPtr->Disks[i][j].spareRow ==
2228 0) &&
2229 (raidPtr->Disks[i][j].spareCol ==
2230 sparecol)) {
2231 srow = i;
2232 scol = j;
2233 break;
2234 }
2235 }
2236 }
2237
2238 raidread_component_label(
2239 raidPtr->Disks[0][sparecol].dev,
2240 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2241 &c_label);
2242 /* make sure status is noted */
2243 c_label.version = RF_COMPONENT_LABEL_VERSION;
2244 c_label.mod_counter = raidPtr->mod_counter;
2245 c_label.serial_number = raidPtr->serial_number;
2246 c_label.row = srow;
2247 c_label.column = scol;
2248 c_label.num_rows = raidPtr->numRow;
2249 c_label.num_columns = raidPtr->numCol;
2250 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2251 c_label.status = rf_ds_optimal;
2252 raidwrite_component_label(
2253 raidPtr->Disks[0][sparecol].dev,
2254 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2255 &c_label);
2256 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2257 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2258 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2259 raidPtr->mod_counter);
2260 }
2261 }
2262 }
2263 /* printf("Component labels updated\n"); */
2264 }
2265
2266 void
2267 rf_ReconThread(req)
2268 struct rf_recon_req *req;
2269 {
2270 int s;
2271 RF_Raid_t *raidPtr;
2272
2273 s = splbio();
2274 raidPtr = (RF_Raid_t *) req->raidPtr;
2275 raidPtr->recon_in_progress = 1;
2276
2277 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2278 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2279
2280 /* XXX get rid of this! we don't need it at all.. */
2281 RF_Free(req, sizeof(*req));
2282
2283 raidPtr->recon_in_progress = 0;
2284 splx(s);
2285
2286 /* That's all... */
2287 kthread_exit(0); /* does not return */
2288 }
2289
2290 void
2291 rf_RewriteParityThread(raidPtr)
2292 RF_Raid_t *raidPtr;
2293 {
2294 int retcode;
2295 int s;
2296
2297 raidPtr->parity_rewrite_in_progress = 1;
2298 s = splbio();
2299 retcode = rf_RewriteParity(raidPtr);
2300 splx(s);
2301 if (retcode) {
2302 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2303 } else {
2304 /* set the clean bit! If we shutdown correctly,
2305 the clean bit on each component label will get
2306 set */
2307 raidPtr->parity_good = RF_RAID_CLEAN;
2308 }
2309 raidPtr->parity_rewrite_in_progress = 0;
2310
2311 /* That's all... */
2312 kthread_exit(0); /* does not return */
2313 }
2314
2315
2316 void
2317 rf_CopybackThread(raidPtr)
2318 RF_Raid_t *raidPtr;
2319 {
2320 int s;
2321
2322 raidPtr->copyback_in_progress = 1;
2323 s = splbio();
2324 rf_CopybackReconstructedData(raidPtr);
2325 splx(s);
2326 raidPtr->copyback_in_progress = 0;
2327
2328 /* That's all... */
2329 kthread_exit(0); /* does not return */
2330 }
2331
2332
2333 void
2334 rf_ReconstructInPlaceThread(req)
2335 struct rf_recon_req *req;
2336 {
2337 int retcode;
2338 int s;
2339 RF_Raid_t *raidPtr;
2340
2341 s = splbio();
2342 raidPtr = req->raidPtr;
2343 raidPtr->recon_in_progress = 1;
2344 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2345 RF_Free(req, sizeof(*req));
2346 raidPtr->recon_in_progress = 0;
2347 splx(s);
2348
2349 /* That's all... */
2350 kthread_exit(0); /* does not return */
2351 }
2352