rf_netbsdkintf.c revision 1.42 1 /* $NetBSD: rf_netbsdkintf.c,v 1.42 2000/01/08 01:52:42 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf * bp);
184 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
185 dev_t dev, RF_SectorNum_t startSect,
186 RF_SectorCount_t numSect, caddr_t buf,
187 void (*cbFunc) (struct buf *), void *cbArg,
188 int logBytesPerSector, struct proc * b_proc);
189
190 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
191 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
192
193 void raidattach __P((int));
194 int raidsize __P((dev_t));
195
196 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
197 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
198 static int raidinit __P((dev_t, RF_Raid_t *, int));
199
200 int raidopen __P((dev_t, int, int, struct proc *));
201 int raidclose __P((dev_t, int, int, struct proc *));
202 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
203 int raidwrite __P((dev_t, struct uio *, int));
204 int raidread __P((dev_t, struct uio *, int));
205 void raidstrategy __P((struct buf *));
206 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
207
208 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
209 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
210 void rf_update_component_labels( RF_Raid_t *);
211 /*
212 * Pilfered from ccd.c
213 */
214
215 struct raidbuf {
216 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
217 struct buf *rf_obp; /* ptr. to original I/O buf */
218 int rf_flags; /* misc. flags */
219 RF_DiskQueueData_t *req;/* the request that this was part of.. */
220 };
221
222
223 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
224 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
225
226 /* XXX Not sure if the following should be replacing the raidPtrs above,
227 or if it should be used in conjunction with that... */
228
229 struct raid_softc {
230 int sc_flags; /* flags */
231 int sc_cflags; /* configuration flags */
232 size_t sc_size; /* size of the raid device */
233 dev_t sc_dev; /* our device.. */
234 char sc_xname[20]; /* XXX external name */
235 struct disk sc_dkdev; /* generic disk device info */
236 struct pool sc_cbufpool; /* component buffer pool */
237 struct buf buf_queue; /* used for the device queue */
238 };
239 /* sc_flags */
240 #define RAIDF_INITED 0x01 /* unit has been initialized */
241 #define RAIDF_WLABEL 0x02 /* label area is writable */
242 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
243 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
244 #define RAIDF_LOCKED 0x80 /* unit is locked */
245
246 #define raidunit(x) DISKUNIT(x)
247 static int numraid = 0;
248
249 /*
250 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
251 * Be aware that large numbers can allow the driver to consume a lot of
252 * kernel memory, especially on writes, and in degraded mode reads.
253 *
254 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
255 * a single 64K write will typically require 64K for the old data,
256 * 64K for the old parity, and 64K for the new parity, for a total
257 * of 192K (if the parity buffer is not re-used immediately).
258 * Even it if is used immedately, that's still 128K, which when multiplied
259 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
260 *
261 * Now in degraded mode, for example, a 64K read on the above setup may
262 * require data reconstruction, which will require *all* of the 4 remaining
263 * disks to participate -- 4 * 32K/disk == 128K again.
264 */
265
266 #ifndef RAIDOUTSTANDING
267 #define RAIDOUTSTANDING 6
268 #endif
269
270 #define RAIDLABELDEV(dev) \
271 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
272
273 /* declared here, and made public, for the benefit of KVM stuff.. */
274 struct raid_softc *raid_softc;
275
276 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
277 struct disklabel *));
278 static void raidgetdisklabel __P((dev_t));
279 static void raidmakedisklabel __P((struct raid_softc *));
280
281 static int raidlock __P((struct raid_softc *));
282 static void raidunlock __P((struct raid_softc *));
283 int raidlookup __P((char *, struct proc * p, struct vnode **));
284
285 static void rf_markalldirty __P((RF_Raid_t *));
286
287 void rf_ReconThread __P((struct rf_recon_req *));
288 /* XXX what I want is: */
289 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
290 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
291 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
292 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
293
294 void
295 raidattach(num)
296 int num;
297 {
298 int raidID;
299 int i, rc;
300
301 #ifdef DEBUG
302 printf("raidattach: Asked for %d units\n", num);
303 #endif
304
305 if (num <= 0) {
306 #ifdef DIAGNOSTIC
307 panic("raidattach: count <= 0");
308 #endif
309 return;
310 }
311 /* This is where all the initialization stuff gets done. */
312
313 /* Make some space for requested number of units... */
314
315 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
316 if (raidPtrs == NULL) {
317 panic("raidPtrs is NULL!!\n");
318 }
319
320 rc = rf_mutex_init(&rf_sparet_wait_mutex);
321 if (rc) {
322 RF_PANIC();
323 }
324
325 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
326
327 for (i = 0; i < numraid; i++)
328 raidPtrs[i] = NULL;
329 rc = rf_BootRaidframe();
330 if (rc == 0)
331 printf("Kernelized RAIDframe activated\n");
332 else
333 panic("Serious error booting RAID!!\n");
334
335 /* put together some datastructures like the CCD device does.. This
336 * lets us lock the device and what-not when it gets opened. */
337
338 raid_softc = (struct raid_softc *)
339 malloc(num * sizeof(struct raid_softc),
340 M_RAIDFRAME, M_NOWAIT);
341 if (raid_softc == NULL) {
342 printf("WARNING: no memory for RAIDframe driver\n");
343 return;
344 }
345 numraid = num;
346 bzero(raid_softc, num * sizeof(struct raid_softc));
347
348 for (raidID = 0; raidID < num; raidID++) {
349 raid_softc[raidID].buf_queue.b_actf = NULL;
350 raid_softc[raidID].buf_queue.b_actb =
351 &raid_softc[raidID].buf_queue.b_actf;
352 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
353 (RF_Raid_t *));
354 if (raidPtrs[raidID] == NULL) {
355 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
356 numraid = raidID;
357 return;
358 }
359 }
360 }
361
362
363 int
364 raidsize(dev)
365 dev_t dev;
366 {
367 struct raid_softc *rs;
368 struct disklabel *lp;
369 int part, unit, omask, size;
370
371 unit = raidunit(dev);
372 if (unit >= numraid)
373 return (-1);
374 rs = &raid_softc[unit];
375
376 if ((rs->sc_flags & RAIDF_INITED) == 0)
377 return (-1);
378
379 part = DISKPART(dev);
380 omask = rs->sc_dkdev.dk_openmask & (1 << part);
381 lp = rs->sc_dkdev.dk_label;
382
383 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
384 return (-1);
385
386 if (lp->d_partitions[part].p_fstype != FS_SWAP)
387 size = -1;
388 else
389 size = lp->d_partitions[part].p_size *
390 (lp->d_secsize / DEV_BSIZE);
391
392 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
393 return (-1);
394
395 return (size);
396
397 }
398
399 int
400 raiddump(dev, blkno, va, size)
401 dev_t dev;
402 daddr_t blkno;
403 caddr_t va;
404 size_t size;
405 {
406 /* Not implemented. */
407 return ENXIO;
408 }
409 /* ARGSUSED */
410 int
411 raidopen(dev, flags, fmt, p)
412 dev_t dev;
413 int flags, fmt;
414 struct proc *p;
415 {
416 int unit = raidunit(dev);
417 struct raid_softc *rs;
418 struct disklabel *lp;
419 int part, pmask;
420 int error = 0;
421
422 if (unit >= numraid)
423 return (ENXIO);
424 rs = &raid_softc[unit];
425
426 if ((error = raidlock(rs)) != 0)
427 return (error);
428 lp = rs->sc_dkdev.dk_label;
429
430 part = DISKPART(dev);
431 pmask = (1 << part);
432
433 db1_printf(("Opening raid device number: %d partition: %d\n",
434 unit, part));
435
436
437 if ((rs->sc_flags & RAIDF_INITED) &&
438 (rs->sc_dkdev.dk_openmask == 0))
439 raidgetdisklabel(dev);
440
441 /* make sure that this partition exists */
442
443 if (part != RAW_PART) {
444 db1_printf(("Not a raw partition..\n"));
445 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
446 ((part >= lp->d_npartitions) ||
447 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
448 error = ENXIO;
449 raidunlock(rs);
450 db1_printf(("Bailing out...\n"));
451 return (error);
452 }
453 }
454 /* Prevent this unit from being unconfigured while open. */
455 switch (fmt) {
456 case S_IFCHR:
457 rs->sc_dkdev.dk_copenmask |= pmask;
458 break;
459
460 case S_IFBLK:
461 rs->sc_dkdev.dk_bopenmask |= pmask;
462 break;
463 }
464
465 if ((rs->sc_dkdev.dk_openmask == 0) &&
466 ((rs->sc_flags & RAIDF_INITED) != 0)) {
467 /* First one... mark things as dirty... Note that we *MUST*
468 have done a configure before this. I DO NOT WANT TO BE
469 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
470 THAT THEY BELONG TOGETHER!!!!! */
471 /* XXX should check to see if we're only open for reading
472 here... If so, we needn't do this, but then need some
473 other way of keeping track of what's happened.. */
474
475 rf_markalldirty( raidPtrs[unit] );
476 }
477
478
479 rs->sc_dkdev.dk_openmask =
480 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
481
482 raidunlock(rs);
483
484 return (error);
485
486
487 }
488 /* ARGSUSED */
489 int
490 raidclose(dev, flags, fmt, p)
491 dev_t dev;
492 int flags, fmt;
493 struct proc *p;
494 {
495 int unit = raidunit(dev);
496 struct raid_softc *rs;
497 int error = 0;
498 int part;
499
500 if (unit >= numraid)
501 return (ENXIO);
502 rs = &raid_softc[unit];
503
504 if ((error = raidlock(rs)) != 0)
505 return (error);
506
507 part = DISKPART(dev);
508
509 /* ...that much closer to allowing unconfiguration... */
510 switch (fmt) {
511 case S_IFCHR:
512 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
513 break;
514
515 case S_IFBLK:
516 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
517 break;
518 }
519 rs->sc_dkdev.dk_openmask =
520 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
521
522 if ((rs->sc_dkdev.dk_openmask == 0) &&
523 ((rs->sc_flags & RAIDF_INITED) != 0)) {
524 /* Last one... device is not unconfigured yet.
525 Device shutdown has taken care of setting the
526 clean bits if RAIDF_INITED is not set
527 mark things as clean... */
528 rf_update_component_labels( raidPtrs[unit] );
529 }
530
531 raidunlock(rs);
532 return (0);
533
534 }
535
536 void
537 raidstrategy(bp)
538 register struct buf *bp;
539 {
540 register int s;
541
542 unsigned int raidID = raidunit(bp->b_dev);
543 RF_Raid_t *raidPtr;
544 struct raid_softc *rs = &raid_softc[raidID];
545 struct disklabel *lp;
546 struct buf *dp;
547 int wlabel;
548
549 if ((rs->sc_flags & RAIDF_INITED) ==0) {
550 bp->b_error = ENXIO;
551 bp->b_flags = B_ERROR;
552 bp->b_resid = bp->b_bcount;
553 biodone(bp);
554 return;
555 }
556 if (raidID >= numraid || !raidPtrs[raidID]) {
557 bp->b_error = ENODEV;
558 bp->b_flags |= B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 raidPtr = raidPtrs[raidID];
564 if (!raidPtr->valid) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 if (bp->b_bcount == 0) {
572 db1_printf(("b_bcount is zero..\n"));
573 biodone(bp);
574 return;
575 }
576 lp = rs->sc_dkdev.dk_label;
577
578 /*
579 * Do bounds checking and adjust transfer. If there's an
580 * error, the bounds check will flag that for us.
581 */
582
583 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
584 if (DISKPART(bp->b_dev) != RAW_PART)
585 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
586 db1_printf(("Bounds check failed!!:%d %d\n",
587 (int) bp->b_blkno, (int) wlabel));
588 biodone(bp);
589 return;
590 }
591 s = splbio();
592
593 bp->b_resid = 0;
594
595 /* stuff it onto our queue */
596
597 dp = &rs->buf_queue;
598 bp->b_actf = NULL;
599 bp->b_actb = dp->b_actb;
600 *dp->b_actb = bp;
601 dp->b_actb = &bp->b_actf;
602
603 raidstart(raidPtrs[raidID]);
604
605 splx(s);
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 RF_Config_t *k_cfg, *u_cfg;
665 RF_Raid_t *raidPtr;
666 RF_AccTotals_t *totals;
667 RF_DeviceConfig_t *d_cfg, **ucfgp;
668 u_char *specific_buf;
669 int retcode = 0;
670 int row;
671 int column;
672 struct rf_recon_req *rrcopy, *rr;
673 RF_ComponentLabel_t *component_label;
674 RF_ComponentLabel_t ci_label;
675 RF_ComponentLabel_t **c_label_ptr;
676 RF_SingleComponent_t *sparePtr,*componentPtr;
677 RF_SingleComponent_t hot_spare;
678 RF_SingleComponent_t component;
679 int i, j, d;
680
681 if (unit >= numraid)
682 return (ENXIO);
683 rs = &raid_softc[unit];
684 raidPtr = raidPtrs[unit];
685
686 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
687 (int) DISKPART(dev), (int) unit, (int) cmd));
688
689 /* Must be open for writes for these commands... */
690 switch (cmd) {
691 case DIOCSDINFO:
692 case DIOCWDINFO:
693 case DIOCWLABEL:
694 if ((flag & FWRITE) == 0)
695 return (EBADF);
696 }
697
698 /* Must be initialized for these... */
699 switch (cmd) {
700 case DIOCGDINFO:
701 case DIOCSDINFO:
702 case DIOCWDINFO:
703 case DIOCGPART:
704 case DIOCWLABEL:
705 case DIOCGDEFLABEL:
706 case RAIDFRAME_SHUTDOWN:
707 case RAIDFRAME_REWRITEPARITY:
708 case RAIDFRAME_GET_INFO:
709 case RAIDFRAME_RESET_ACCTOTALS:
710 case RAIDFRAME_GET_ACCTOTALS:
711 case RAIDFRAME_KEEP_ACCTOTALS:
712 case RAIDFRAME_GET_SIZE:
713 case RAIDFRAME_FAIL_DISK:
714 case RAIDFRAME_COPYBACK:
715 case RAIDFRAME_CHECK_RECON_STATUS:
716 case RAIDFRAME_GET_COMPONENT_LABEL:
717 case RAIDFRAME_SET_COMPONENT_LABEL:
718 case RAIDFRAME_ADD_HOT_SPARE:
719 case RAIDFRAME_REMOVE_HOT_SPARE:
720 case RAIDFRAME_INIT_LABELS:
721 case RAIDFRAME_REBUILD_IN_PLACE:
722 case RAIDFRAME_CHECK_PARITY:
723 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
724 case RAIDFRAME_CHECK_COPYBACK_STATUS:
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 }
728
729 switch (cmd) {
730
731
732 /* configure the system */
733 case RAIDFRAME_CONFIGURE:
734
735 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
736 /* copy-in the configuration information */
737 /* data points to a pointer to the configuration structure */
738 u_cfg = *((RF_Config_t **) data);
739 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
740 if (k_cfg == NULL) {
741 return (ENOMEM);
742 }
743 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
744 sizeof(RF_Config_t));
745 if (retcode) {
746 RF_Free(k_cfg, sizeof(RF_Config_t));
747 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
748 retcode));
749 return (retcode);
750 }
751 /* allocate a buffer for the layout-specific data, and copy it
752 * in */
753 if (k_cfg->layoutSpecificSize) {
754 if (k_cfg->layoutSpecificSize > 10000) {
755 /* sanity check */
756 RF_Free(k_cfg, sizeof(RF_Config_t));
757 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
758 return (EINVAL);
759 }
760 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
761 (u_char *));
762 if (specific_buf == NULL) {
763 RF_Free(k_cfg, sizeof(RF_Config_t));
764 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
765 return (ENOMEM);
766 }
767 retcode = copyin(k_cfg->layoutSpecific,
768 (caddr_t) specific_buf,
769 k_cfg->layoutSpecificSize);
770 if (retcode) {
771 RF_Free(k_cfg, sizeof(RF_Config_t));
772 RF_Free(specific_buf,
773 k_cfg->layoutSpecificSize);
774 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
775 retcode));
776 return (retcode);
777 }
778 } else
779 specific_buf = NULL;
780 k_cfg->layoutSpecific = specific_buf;
781
782 /* should do some kind of sanity check on the configuration.
783 * Store the sum of all the bytes in the last byte? */
784
785 /* configure the system */
786
787 raidPtr->raidid = unit;
788
789 retcode = rf_Configure(raidPtr, k_cfg);
790
791 if (retcode == 0) {
792
793 /* allow this many simultaneous IO's to
794 this RAID device */
795 raidPtr->openings = RAIDOUTSTANDING;
796
797 /* XXX should be moved to rf_Configure() */
798
799 raidPtr->copyback_in_progress = 0;
800 raidPtr->parity_rewrite_in_progress = 0;
801 raidPtr->recon_in_progress = 0;
802
803 retcode = raidinit(dev, raidPtr, unit);
804 rf_markalldirty( raidPtr );
805 }
806 /* free the buffers. No return code here. */
807 if (k_cfg->layoutSpecificSize) {
808 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
809 }
810 RF_Free(k_cfg, sizeof(RF_Config_t));
811
812 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
813 retcode));
814
815 return (retcode);
816
817 /* shutdown the system */
818 case RAIDFRAME_SHUTDOWN:
819
820 if ((error = raidlock(rs)) != 0)
821 return (error);
822
823 /*
824 * If somebody has a partition mounted, we shouldn't
825 * shutdown.
826 */
827
828 part = DISKPART(dev);
829 pmask = (1 << part);
830 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
831 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
832 (rs->sc_dkdev.dk_copenmask & pmask))) {
833 raidunlock(rs);
834 return (EBUSY);
835 }
836
837 retcode = rf_Shutdown(raidPtr);
838
839 pool_destroy(&rs->sc_cbufpool);
840
841 /* It's no longer initialized... */
842 rs->sc_flags &= ~RAIDF_INITED;
843
844 /* Detach the disk. */
845 disk_detach(&rs->sc_dkdev);
846
847 raidunlock(rs);
848
849 return (retcode);
850 case RAIDFRAME_GET_COMPONENT_LABEL:
851 c_label_ptr = (RF_ComponentLabel_t **) data;
852 /* need to read the component label for the disk indicated
853 by row,column in component_label
854 XXX need to sanity check these values!!!
855 */
856
857 /* For practice, let's get it directly fromdisk, rather
858 than from the in-core copy */
859 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
860 (RF_ComponentLabel_t *));
861 if (component_label == NULL)
862 return (ENOMEM);
863
864 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
865
866 retcode = copyin( *c_label_ptr, component_label,
867 sizeof(RF_ComponentLabel_t));
868
869 if (retcode) {
870 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
871 return(retcode);
872 }
873
874 row = component_label->row;
875 column = component_label->column;
876
877 if ((row < 0) || (row >= raidPtr->numRow) ||
878 (column < 0) || (column >= raidPtr->numCol)) {
879 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
880 return(EINVAL);
881 }
882
883 raidread_component_label(
884 raidPtr->Disks[row][column].dev,
885 raidPtr->raid_cinfo[row][column].ci_vp,
886 component_label );
887
888 retcode = copyout((caddr_t) component_label,
889 (caddr_t) *c_label_ptr,
890 sizeof(RF_ComponentLabel_t));
891 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
892 return (retcode);
893
894 case RAIDFRAME_SET_COMPONENT_LABEL:
895 component_label = (RF_ComponentLabel_t *) data;
896
897 /* XXX check the label for valid stuff... */
898 /* Note that some things *should not* get modified --
899 the user should be re-initing the labels instead of
900 trying to patch things.
901 */
902
903 printf("Got component label:\n");
904 printf("Version: %d\n",component_label->version);
905 printf("Serial Number: %d\n",component_label->serial_number);
906 printf("Mod counter: %d\n",component_label->mod_counter);
907 printf("Row: %d\n", component_label->row);
908 printf("Column: %d\n", component_label->column);
909 printf("Num Rows: %d\n", component_label->num_rows);
910 printf("Num Columns: %d\n", component_label->num_columns);
911 printf("Clean: %d\n", component_label->clean);
912 printf("Status: %d\n", component_label->status);
913
914 row = component_label->row;
915 column = component_label->column;
916
917 if ((row < 0) || (row >= raidPtr->numRow) ||
918 (column < 0) || (column >= raidPtr->numCol)) {
919 return(EINVAL);
920 }
921
922 /* XXX this isn't allowed to do anything for now :-) */
923 #if 0
924 raidwrite_component_label(
925 raidPtr->Disks[row][column].dev,
926 raidPtr->raid_cinfo[row][column].ci_vp,
927 component_label );
928 #endif
929 return (0);
930
931 case RAIDFRAME_INIT_LABELS:
932 component_label = (RF_ComponentLabel_t *) data;
933 /*
934 we only want the serial number from
935 the above. We get all the rest of the information
936 from the config that was used to create this RAID
937 set.
938 */
939
940 raidPtr->serial_number = component_label->serial_number;
941 /* current version number */
942 ci_label.version = RF_COMPONENT_LABEL_VERSION;
943 ci_label.serial_number = component_label->serial_number;
944 ci_label.mod_counter = raidPtr->mod_counter;
945 ci_label.num_rows = raidPtr->numRow;
946 ci_label.num_columns = raidPtr->numCol;
947 ci_label.clean = RF_RAID_DIRTY; /* not clean */
948 ci_label.status = rf_ds_optimal; /* "It's good!" */
949
950 for(row=0;row<raidPtr->numRow;row++) {
951 ci_label.row = row;
952 for(column=0;column<raidPtr->numCol;column++) {
953 ci_label.column = column;
954 raidwrite_component_label(
955 raidPtr->Disks[row][column].dev,
956 raidPtr->raid_cinfo[row][column].ci_vp,
957 &ci_label );
958 }
959 }
960
961 return (retcode);
962
963 /* initialize all parity */
964 case RAIDFRAME_REWRITEPARITY:
965
966 if (raidPtr->Layout.map->faultsTolerated == 0) {
967 /* Parity for RAID 0 is trivially correct */
968 raidPtr->parity_good = RF_RAID_CLEAN;
969 return(0);
970 }
971
972 if (raidPtr->parity_rewrite_in_progress == 1) {
973 /* Re-write is already in progress! */
974 return(EINVAL);
975 }
976
977 /* borrow the thread of the requesting process */
978
979 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
980 rf_RewriteParityThread,
981 raidPtr,"raid_parity");
982 return (retcode);
983
984
985 case RAIDFRAME_ADD_HOT_SPARE:
986 sparePtr = (RF_SingleComponent_t *) data;
987 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
988 printf("Adding spare\n");
989 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
990 return(retcode);
991
992 case RAIDFRAME_REMOVE_HOT_SPARE:
993 return(retcode);
994
995 case RAIDFRAME_REBUILD_IN_PLACE:
996
997 if (raidPtr->Layout.map->faultsTolerated == 0) {
998 /* Can't do this on a RAID 0!! */
999 return(EINVAL);
1000 }
1001
1002 if (raidPtr->recon_in_progress == 1) {
1003 /* a reconstruct is already in progress! */
1004 return(EINVAL);
1005 }
1006
1007 componentPtr = (RF_SingleComponent_t *) data;
1008 memcpy( &component, componentPtr,
1009 sizeof(RF_SingleComponent_t));
1010 row = component.row;
1011 column = component.column;
1012 printf("Rebuild: %d %d\n",row, column);
1013 if ((row < 0) || (row >= raidPtr->numRow) ||
1014 (column < 0) || (column >= raidPtr->numCol)) {
1015 return(EINVAL);
1016 }
1017
1018 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1019 if (rrcopy == NULL)
1020 return(ENOMEM);
1021
1022 rrcopy->raidPtr = (void *) raidPtr;
1023 rrcopy->row = row;
1024 rrcopy->col = column;
1025
1026 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1027 rf_ReconstructInPlaceThread,
1028 rrcopy,"raid_reconip");
1029 return(retcode);
1030
1031 case RAIDFRAME_GET_INFO:
1032 if (!raidPtr->valid)
1033 return (ENODEV);
1034 ucfgp = (RF_DeviceConfig_t **) data;
1035 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1036 (RF_DeviceConfig_t *));
1037 if (d_cfg == NULL)
1038 return (ENOMEM);
1039 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1040 d_cfg->rows = raidPtr->numRow;
1041 d_cfg->cols = raidPtr->numCol;
1042 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1043 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1044 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1045 return (ENOMEM);
1046 }
1047 d_cfg->nspares = raidPtr->numSpare;
1048 if (d_cfg->nspares >= RF_MAX_DISKS) {
1049 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1050 return (ENOMEM);
1051 }
1052 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1053 d = 0;
1054 for (i = 0; i < d_cfg->rows; i++) {
1055 for (j = 0; j < d_cfg->cols; j++) {
1056 d_cfg->devs[d] = raidPtr->Disks[i][j];
1057 d++;
1058 }
1059 }
1060 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1061 d_cfg->spares[i] = raidPtr->Disks[0][j];
1062 }
1063 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1064 sizeof(RF_DeviceConfig_t));
1065 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1066
1067 return (retcode);
1068
1069 case RAIDFRAME_CHECK_PARITY:
1070 *(int *) data = raidPtr->parity_good;
1071 return (0);
1072
1073 case RAIDFRAME_RESET_ACCTOTALS:
1074 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1075 return (0);
1076
1077 case RAIDFRAME_GET_ACCTOTALS:
1078 totals = (RF_AccTotals_t *) data;
1079 *totals = raidPtr->acc_totals;
1080 return (0);
1081
1082 case RAIDFRAME_KEEP_ACCTOTALS:
1083 raidPtr->keep_acc_totals = *(int *)data;
1084 return (0);
1085
1086 case RAIDFRAME_GET_SIZE:
1087 *(int *) data = raidPtr->totalSectors;
1088 return (0);
1089
1090 /* fail a disk & optionally start reconstruction */
1091 case RAIDFRAME_FAIL_DISK:
1092
1093 if (raidPtr->Layout.map->faultsTolerated == 0) {
1094 /* Can't do this on a RAID 0!! */
1095 return(EINVAL);
1096 }
1097
1098 rr = (struct rf_recon_req *) data;
1099
1100 if (rr->row < 0 || rr->row >= raidPtr->numRow
1101 || rr->col < 0 || rr->col >= raidPtr->numCol)
1102 return (EINVAL);
1103
1104 printf("raid%d: Failing the disk: row: %d col: %d\n",
1105 unit, rr->row, rr->col);
1106
1107 /* make a copy of the recon request so that we don't rely on
1108 * the user's buffer */
1109 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1110 if (rrcopy == NULL)
1111 return(ENOMEM);
1112 bcopy(rr, rrcopy, sizeof(*rr));
1113 rrcopy->raidPtr = (void *) raidPtr;
1114
1115 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1116 rf_ReconThread,
1117 rrcopy,"raid_recon");
1118 return (0);
1119
1120 /* invoke a copyback operation after recon on whatever disk
1121 * needs it, if any */
1122 case RAIDFRAME_COPYBACK:
1123
1124 if (raidPtr->Layout.map->faultsTolerated == 0) {
1125 /* This makes no sense on a RAID 0!! */
1126 return(EINVAL);
1127 }
1128
1129 if (raidPtr->copyback_in_progress == 1) {
1130 /* Copyback is already in progress! */
1131 return(EINVAL);
1132 }
1133
1134 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1135 rf_CopybackThread,
1136 raidPtr,"raid_copyback");
1137 return (retcode);
1138
1139 /* return the percentage completion of reconstruction */
1140 case RAIDFRAME_CHECK_RECON_STATUS:
1141 if (raidPtr->Layout.map->faultsTolerated == 0) {
1142 /* This makes no sense on a RAID 0 */
1143 return(EINVAL);
1144 }
1145 row = 0; /* XXX we only consider a single row... */
1146 if (raidPtr->status[row] != rf_rs_reconstructing)
1147 *(int *) data = 100;
1148 else
1149 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1150 return (0);
1151
1152 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1153 if (raidPtr->Layout.map->faultsTolerated == 0) {
1154 /* This makes no sense on a RAID 0 */
1155 return(EINVAL);
1156 }
1157 if (raidPtr->parity_rewrite_in_progress == 1) {
1158 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1159 } else {
1160 *(int *) data = 100;
1161 }
1162 return (0);
1163
1164 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1165 if (raidPtr->Layout.map->faultsTolerated == 0) {
1166 /* This makes no sense on a RAID 0 */
1167 return(EINVAL);
1168 }
1169 if (raidPtr->copyback_in_progress == 1) {
1170 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1171 raidPtr->Layout.numStripe;
1172 } else {
1173 *(int *) data = 100;
1174 }
1175 return (0);
1176
1177
1178 /* the sparetable daemon calls this to wait for the kernel to
1179 * need a spare table. this ioctl does not return until a
1180 * spare table is needed. XXX -- calling mpsleep here in the
1181 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1182 * -- I should either compute the spare table in the kernel,
1183 * or have a different -- XXX XXX -- interface (a different
1184 * character device) for delivering the table -- XXX */
1185 #if 0
1186 case RAIDFRAME_SPARET_WAIT:
1187 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1188 while (!rf_sparet_wait_queue)
1189 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1190 waitreq = rf_sparet_wait_queue;
1191 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1192 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1193
1194 /* structure assignment */
1195 *((RF_SparetWait_t *) data) = *waitreq;
1196
1197 RF_Free(waitreq, sizeof(*waitreq));
1198 return (0);
1199
1200 /* wakes up a process waiting on SPARET_WAIT and puts an error
1201 * code in it that will cause the dameon to exit */
1202 case RAIDFRAME_ABORT_SPARET_WAIT:
1203 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1204 waitreq->fcol = -1;
1205 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1206 waitreq->next = rf_sparet_wait_queue;
1207 rf_sparet_wait_queue = waitreq;
1208 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1209 wakeup(&rf_sparet_wait_queue);
1210 return (0);
1211
1212 /* used by the spare table daemon to deliver a spare table
1213 * into the kernel */
1214 case RAIDFRAME_SEND_SPARET:
1215
1216 /* install the spare table */
1217 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1218
1219 /* respond to the requestor. the return status of the spare
1220 * table installation is passed in the "fcol" field */
1221 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1222 waitreq->fcol = retcode;
1223 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1224 waitreq->next = rf_sparet_resp_queue;
1225 rf_sparet_resp_queue = waitreq;
1226 wakeup(&rf_sparet_resp_queue);
1227 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1228
1229 return (retcode);
1230 #endif
1231
1232 default:
1233 break; /* fall through to the os-specific code below */
1234
1235 }
1236
1237 if (!raidPtr->valid)
1238 return (EINVAL);
1239
1240 /*
1241 * Add support for "regular" device ioctls here.
1242 */
1243
1244 switch (cmd) {
1245 case DIOCGDINFO:
1246 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1247 break;
1248
1249 case DIOCGPART:
1250 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1251 ((struct partinfo *) data)->part =
1252 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1253 break;
1254
1255 case DIOCWDINFO:
1256 case DIOCSDINFO:
1257 if ((error = raidlock(rs)) != 0)
1258 return (error);
1259
1260 rs->sc_flags |= RAIDF_LABELLING;
1261
1262 error = setdisklabel(rs->sc_dkdev.dk_label,
1263 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1264 if (error == 0) {
1265 if (cmd == DIOCWDINFO)
1266 error = writedisklabel(RAIDLABELDEV(dev),
1267 raidstrategy, rs->sc_dkdev.dk_label,
1268 rs->sc_dkdev.dk_cpulabel);
1269 }
1270 rs->sc_flags &= ~RAIDF_LABELLING;
1271
1272 raidunlock(rs);
1273
1274 if (error)
1275 return (error);
1276 break;
1277
1278 case DIOCWLABEL:
1279 if (*(int *) data != 0)
1280 rs->sc_flags |= RAIDF_WLABEL;
1281 else
1282 rs->sc_flags &= ~RAIDF_WLABEL;
1283 break;
1284
1285 case DIOCGDEFLABEL:
1286 raidgetdefaultlabel(raidPtr, rs,
1287 (struct disklabel *) data);
1288 break;
1289
1290 default:
1291 retcode = ENOTTY;
1292 }
1293 return (retcode);
1294
1295 }
1296
1297
1298 /* raidinit -- complete the rest of the initialization for the
1299 RAIDframe device. */
1300
1301
1302 static int
1303 raidinit(dev, raidPtr, unit)
1304 dev_t dev;
1305 RF_Raid_t *raidPtr;
1306 int unit;
1307 {
1308 int retcode;
1309 struct raid_softc *rs;
1310
1311 retcode = 0;
1312
1313 rs = &raid_softc[unit];
1314 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1315 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1316
1317
1318 /* XXX should check return code first... */
1319 rs->sc_flags |= RAIDF_INITED;
1320
1321 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1322
1323 rs->sc_dkdev.dk_name = rs->sc_xname;
1324
1325 /* disk_attach actually creates space for the CPU disklabel, among
1326 * other things, so it's critical to call this *BEFORE* we try putzing
1327 * with disklabels. */
1328
1329 disk_attach(&rs->sc_dkdev);
1330
1331 /* XXX There may be a weird interaction here between this, and
1332 * protectedSectors, as used in RAIDframe. */
1333
1334 rs->sc_size = raidPtr->totalSectors;
1335 rs->sc_dev = dev;
1336
1337 return (retcode);
1338 }
1339
1340 /* wake up the daemon & tell it to get us a spare table
1341 * XXX
1342 * the entries in the queues should be tagged with the raidPtr
1343 * so that in the extremely rare case that two recons happen at once,
1344 * we know for which device were requesting a spare table
1345 * XXX
1346 *
1347 * XXX This code is not currently used. GO
1348 */
1349 int
1350 rf_GetSpareTableFromDaemon(req)
1351 RF_SparetWait_t *req;
1352 {
1353 int retcode;
1354
1355 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1356 req->next = rf_sparet_wait_queue;
1357 rf_sparet_wait_queue = req;
1358 wakeup(&rf_sparet_wait_queue);
1359
1360 /* mpsleep unlocks the mutex */
1361 while (!rf_sparet_resp_queue) {
1362 tsleep(&rf_sparet_resp_queue, PRIBIO,
1363 "raidframe getsparetable", 0);
1364 }
1365 req = rf_sparet_resp_queue;
1366 rf_sparet_resp_queue = req->next;
1367 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1368
1369 retcode = req->fcol;
1370 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1371 * alloc'd */
1372 return (retcode);
1373 }
1374
1375 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1376 * bp & passes it down.
1377 * any calls originating in the kernel must use non-blocking I/O
1378 * do some extra sanity checking to return "appropriate" error values for
1379 * certain conditions (to make some standard utilities work)
1380 *
1381 * Formerly known as: rf_DoAccessKernel
1382 */
1383 void
1384 raidstart(raidPtr)
1385 RF_Raid_t *raidPtr;
1386 {
1387 RF_SectorCount_t num_blocks, pb, sum;
1388 RF_RaidAddr_t raid_addr;
1389 int retcode;
1390 struct partition *pp;
1391 daddr_t blocknum;
1392 int unit;
1393 struct raid_softc *rs;
1394 int do_async;
1395 struct buf *bp;
1396 struct buf *dp;
1397
1398 unit = raidPtr->raidid;
1399 rs = &raid_softc[unit];
1400
1401 /* Check to see if we're at the limit... */
1402 RF_LOCK_MUTEX(raidPtr->mutex);
1403 while (raidPtr->openings > 0) {
1404 RF_UNLOCK_MUTEX(raidPtr->mutex);
1405
1406 /* get the next item, if any, from the queue */
1407 dp = &rs->buf_queue;
1408 bp = dp->b_actf;
1409 if (bp == NULL) {
1410 /* nothing more to do */
1411 return;
1412 }
1413
1414 /* update structures */
1415 dp = bp->b_actf;
1416 if (dp != NULL) {
1417 dp->b_actb = bp->b_actb;
1418 } else {
1419 rs->buf_queue.b_actb = bp->b_actb;
1420 }
1421 *bp->b_actb = dp;
1422
1423 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1424 * partition.. Need to make it absolute to the underlying
1425 * device.. */
1426
1427 blocknum = bp->b_blkno;
1428 if (DISKPART(bp->b_dev) != RAW_PART) {
1429 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1430 blocknum += pp->p_offset;
1431 }
1432
1433 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1434 (int) blocknum));
1435
1436 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1437 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1438
1439 /* *THIS* is where we adjust what block we're going to...
1440 * but DO NOT TOUCH bp->b_blkno!!! */
1441 raid_addr = blocknum;
1442
1443 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1444 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1445 sum = raid_addr + num_blocks + pb;
1446 if (1 || rf_debugKernelAccess) {
1447 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1448 (int) raid_addr, (int) sum, (int) num_blocks,
1449 (int) pb, (int) bp->b_resid));
1450 }
1451 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1452 || (sum < num_blocks) || (sum < pb)) {
1453 bp->b_error = ENOSPC;
1454 bp->b_flags |= B_ERROR;
1455 bp->b_resid = bp->b_bcount;
1456 biodone(bp);
1457 RF_LOCK_MUTEX(raidPtr->mutex);
1458 continue;
1459 }
1460 /*
1461 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1462 */
1463
1464 if (bp->b_bcount & raidPtr->sectorMask) {
1465 bp->b_error = EINVAL;
1466 bp->b_flags |= B_ERROR;
1467 bp->b_resid = bp->b_bcount;
1468 biodone(bp);
1469 RF_LOCK_MUTEX(raidPtr->mutex);
1470 continue;
1471
1472 }
1473 db1_printf(("Calling DoAccess..\n"));
1474
1475
1476 RF_LOCK_MUTEX(raidPtr->mutex);
1477 raidPtr->openings--;
1478 RF_UNLOCK_MUTEX(raidPtr->mutex);
1479
1480 /*
1481 * Everything is async.
1482 */
1483 do_async = 1;
1484
1485 /* don't ever condition on bp->b_flags & B_WRITE.
1486 * always condition on B_READ instead */
1487
1488 /* XXX we're still at splbio() here... do we *really*
1489 need to be? */
1490
1491
1492 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1493 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1494 do_async, raid_addr, num_blocks,
1495 bp->b_un.b_addr, bp, NULL, NULL,
1496 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1497
1498
1499 RF_LOCK_MUTEX(raidPtr->mutex);
1500 }
1501 RF_UNLOCK_MUTEX(raidPtr->mutex);
1502 }
1503
1504
1505
1506
1507 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1508
1509 int
1510 rf_DispatchKernelIO(queue, req)
1511 RF_DiskQueue_t *queue;
1512 RF_DiskQueueData_t *req;
1513 {
1514 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1515 struct buf *bp;
1516 struct raidbuf *raidbp = NULL;
1517 struct raid_softc *rs;
1518 int unit;
1519 int s;
1520
1521 s=0;
1522 /* s = splbio();*/ /* want to test this */
1523 /* XXX along with the vnode, we also need the softc associated with
1524 * this device.. */
1525
1526 req->queue = queue;
1527
1528 unit = queue->raidPtr->raidid;
1529
1530 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1531
1532 if (unit >= numraid) {
1533 printf("Invalid unit number: %d %d\n", unit, numraid);
1534 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1535 }
1536 rs = &raid_softc[unit];
1537
1538 /* XXX is this the right place? */
1539 disk_busy(&rs->sc_dkdev);
1540
1541 bp = req->bp;
1542 #if 1
1543 /* XXX when there is a physical disk failure, someone is passing us a
1544 * buffer that contains old stuff!! Attempt to deal with this problem
1545 * without taking a performance hit... (not sure where the real bug
1546 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1547
1548 if (bp->b_flags & B_ERROR) {
1549 bp->b_flags &= ~B_ERROR;
1550 }
1551 if (bp->b_error != 0) {
1552 bp->b_error = 0;
1553 }
1554 #endif
1555 raidbp = RAIDGETBUF(rs);
1556
1557 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1558
1559 /*
1560 * context for raidiodone
1561 */
1562 raidbp->rf_obp = bp;
1563 raidbp->req = req;
1564
1565 LIST_INIT(&raidbp->rf_buf.b_dep);
1566
1567 switch (req->type) {
1568 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1569 /* XXX need to do something extra here.. */
1570 /* I'm leaving this in, as I've never actually seen it used,
1571 * and I'd like folks to report it... GO */
1572 printf(("WAKEUP CALLED\n"));
1573 queue->numOutstanding++;
1574
1575 /* XXX need to glue the original buffer into this?? */
1576
1577 KernelWakeupFunc(&raidbp->rf_buf);
1578 break;
1579
1580 case RF_IO_TYPE_READ:
1581 case RF_IO_TYPE_WRITE:
1582
1583 if (req->tracerec) {
1584 RF_ETIMER_START(req->tracerec->timer);
1585 }
1586 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1587 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1588 req->sectorOffset, req->numSector,
1589 req->buf, KernelWakeupFunc, (void *) req,
1590 queue->raidPtr->logBytesPerSector, req->b_proc);
1591
1592 if (rf_debugKernelAccess) {
1593 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1594 (long) bp->b_blkno));
1595 }
1596 queue->numOutstanding++;
1597 queue->last_deq_sector = req->sectorOffset;
1598 /* acc wouldn't have been let in if there were any pending
1599 * reqs at any other priority */
1600 queue->curPriority = req->priority;
1601
1602 db1_printf(("Going for %c to unit %d row %d col %d\n",
1603 req->type, unit, queue->row, queue->col));
1604 db1_printf(("sector %d count %d (%d bytes) %d\n",
1605 (int) req->sectorOffset, (int) req->numSector,
1606 (int) (req->numSector <<
1607 queue->raidPtr->logBytesPerSector),
1608 (int) queue->raidPtr->logBytesPerSector));
1609 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1610 raidbp->rf_buf.b_vp->v_numoutput++;
1611 }
1612 VOP_STRATEGY(&raidbp->rf_buf);
1613
1614 break;
1615
1616 default:
1617 panic("bad req->type in rf_DispatchKernelIO");
1618 }
1619 db1_printf(("Exiting from DispatchKernelIO\n"));
1620 /* splx(s); */ /* want to test this */
1621 return (0);
1622 }
1623 /* this is the callback function associated with a I/O invoked from
1624 kernel code.
1625 */
1626 static void
1627 KernelWakeupFunc(vbp)
1628 struct buf *vbp;
1629 {
1630 RF_DiskQueueData_t *req = NULL;
1631 RF_DiskQueue_t *queue;
1632 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1633 struct buf *bp;
1634 struct raid_softc *rs;
1635 int unit;
1636 register int s;
1637
1638 s = splbio();
1639 db1_printf(("recovering the request queue:\n"));
1640 req = raidbp->req;
1641
1642 bp = raidbp->rf_obp;
1643
1644 queue = (RF_DiskQueue_t *) req->queue;
1645
1646 if (raidbp->rf_buf.b_flags & B_ERROR) {
1647 bp->b_flags |= B_ERROR;
1648 bp->b_error = raidbp->rf_buf.b_error ?
1649 raidbp->rf_buf.b_error : EIO;
1650 }
1651
1652 /* XXX methinks this could be wrong... */
1653 #if 1
1654 bp->b_resid = raidbp->rf_buf.b_resid;
1655 #endif
1656
1657 if (req->tracerec) {
1658 RF_ETIMER_STOP(req->tracerec->timer);
1659 RF_ETIMER_EVAL(req->tracerec->timer);
1660 RF_LOCK_MUTEX(rf_tracing_mutex);
1661 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1662 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1663 req->tracerec->num_phys_ios++;
1664 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1665 }
1666 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1667
1668 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1669
1670
1671 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1672 * ballistic, and mark the component as hosed... */
1673
1674 if (bp->b_flags & B_ERROR) {
1675 /* Mark the disk as dead */
1676 /* but only mark it once... */
1677 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1678 rf_ds_optimal) {
1679 printf("raid%d: IO Error. Marking %s as failed.\n",
1680 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1681 queue->raidPtr->Disks[queue->row][queue->col].status =
1682 rf_ds_failed;
1683 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1684 queue->raidPtr->numFailures++;
1685 /* XXX here we should bump the version number for each component, and write that data out */
1686 } else { /* Disk is already dead... */
1687 /* printf("Disk already marked as dead!\n"); */
1688 }
1689
1690 }
1691
1692 rs = &raid_softc[unit];
1693 RAIDPUTBUF(rs, raidbp);
1694
1695
1696 if (bp->b_resid == 0) {
1697 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1698 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1699 }
1700
1701 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1702 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1703
1704 splx(s);
1705 }
1706
1707
1708
1709 /*
1710 * initialize a buf structure for doing an I/O in the kernel.
1711 */
1712 static void
1713 InitBP(
1714 struct buf * bp,
1715 struct vnode * b_vp,
1716 unsigned rw_flag,
1717 dev_t dev,
1718 RF_SectorNum_t startSect,
1719 RF_SectorCount_t numSect,
1720 caddr_t buf,
1721 void (*cbFunc) (struct buf *),
1722 void *cbArg,
1723 int logBytesPerSector,
1724 struct proc * b_proc)
1725 {
1726 /* bp->b_flags = B_PHYS | rw_flag; */
1727 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1728 bp->b_bcount = numSect << logBytesPerSector;
1729 bp->b_bufsize = bp->b_bcount;
1730 bp->b_error = 0;
1731 bp->b_dev = dev;
1732 bp->b_un.b_addr = buf;
1733 bp->b_blkno = startSect;
1734 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1735 if (bp->b_bcount == 0) {
1736 panic("bp->b_bcount is zero in InitBP!!\n");
1737 }
1738 bp->b_proc = b_proc;
1739 bp->b_iodone = cbFunc;
1740 bp->b_vp = b_vp;
1741
1742 }
1743
1744 static void
1745 raidgetdefaultlabel(raidPtr, rs, lp)
1746 RF_Raid_t *raidPtr;
1747 struct raid_softc *rs;
1748 struct disklabel *lp;
1749 {
1750 db1_printf(("Building a default label...\n"));
1751 bzero(lp, sizeof(*lp));
1752
1753 /* fabricate a label... */
1754 lp->d_secperunit = raidPtr->totalSectors;
1755 lp->d_secsize = raidPtr->bytesPerSector;
1756 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1757 lp->d_ntracks = 1;
1758 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1759 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1760
1761 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1762 lp->d_type = DTYPE_RAID;
1763 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1764 lp->d_rpm = 3600;
1765 lp->d_interleave = 1;
1766 lp->d_flags = 0;
1767
1768 lp->d_partitions[RAW_PART].p_offset = 0;
1769 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1770 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1771 lp->d_npartitions = RAW_PART + 1;
1772
1773 lp->d_magic = DISKMAGIC;
1774 lp->d_magic2 = DISKMAGIC;
1775 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1776
1777 }
1778 /*
1779 * Read the disklabel from the raid device. If one is not present, fake one
1780 * up.
1781 */
1782 static void
1783 raidgetdisklabel(dev)
1784 dev_t dev;
1785 {
1786 int unit = raidunit(dev);
1787 struct raid_softc *rs = &raid_softc[unit];
1788 char *errstring;
1789 struct disklabel *lp = rs->sc_dkdev.dk_label;
1790 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1791 RF_Raid_t *raidPtr;
1792
1793 db1_printf(("Getting the disklabel...\n"));
1794
1795 bzero(clp, sizeof(*clp));
1796
1797 raidPtr = raidPtrs[unit];
1798
1799 raidgetdefaultlabel(raidPtr, rs, lp);
1800
1801 /*
1802 * Call the generic disklabel extraction routine.
1803 */
1804 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1805 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1806 if (errstring)
1807 raidmakedisklabel(rs);
1808 else {
1809 int i;
1810 struct partition *pp;
1811
1812 /*
1813 * Sanity check whether the found disklabel is valid.
1814 *
1815 * This is necessary since total size of the raid device
1816 * may vary when an interleave is changed even though exactly
1817 * same componets are used, and old disklabel may used
1818 * if that is found.
1819 */
1820 if (lp->d_secperunit != rs->sc_size)
1821 printf("WARNING: %s: "
1822 "total sector size in disklabel (%d) != "
1823 "the size of raid (%ld)\n", rs->sc_xname,
1824 lp->d_secperunit, (long) rs->sc_size);
1825 for (i = 0; i < lp->d_npartitions; i++) {
1826 pp = &lp->d_partitions[i];
1827 if (pp->p_offset + pp->p_size > rs->sc_size)
1828 printf("WARNING: %s: end of partition `%c' "
1829 "exceeds the size of raid (%ld)\n",
1830 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1831 }
1832 }
1833
1834 }
1835 /*
1836 * Take care of things one might want to take care of in the event
1837 * that a disklabel isn't present.
1838 */
1839 static void
1840 raidmakedisklabel(rs)
1841 struct raid_softc *rs;
1842 {
1843 struct disklabel *lp = rs->sc_dkdev.dk_label;
1844 db1_printf(("Making a label..\n"));
1845
1846 /*
1847 * For historical reasons, if there's no disklabel present
1848 * the raw partition must be marked FS_BSDFFS.
1849 */
1850
1851 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1852
1853 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1854
1855 lp->d_checksum = dkcksum(lp);
1856 }
1857 /*
1858 * Lookup the provided name in the filesystem. If the file exists,
1859 * is a valid block device, and isn't being used by anyone else,
1860 * set *vpp to the file's vnode.
1861 * You'll find the original of this in ccd.c
1862 */
1863 int
1864 raidlookup(path, p, vpp)
1865 char *path;
1866 struct proc *p;
1867 struct vnode **vpp; /* result */
1868 {
1869 struct nameidata nd;
1870 struct vnode *vp;
1871 struct vattr va;
1872 int error;
1873
1874 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1875 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1876 #ifdef DEBUG
1877 printf("RAIDframe: vn_open returned %d\n", error);
1878 #endif
1879 return (error);
1880 }
1881 vp = nd.ni_vp;
1882 if (vp->v_usecount > 1) {
1883 VOP_UNLOCK(vp, 0);
1884 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1885 return (EBUSY);
1886 }
1887 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1888 VOP_UNLOCK(vp, 0);
1889 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1890 return (error);
1891 }
1892 /* XXX: eventually we should handle VREG, too. */
1893 if (va.va_type != VBLK) {
1894 VOP_UNLOCK(vp, 0);
1895 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1896 return (ENOTBLK);
1897 }
1898 VOP_UNLOCK(vp, 0);
1899 *vpp = vp;
1900 return (0);
1901 }
1902 /*
1903 * Wait interruptibly for an exclusive lock.
1904 *
1905 * XXX
1906 * Several drivers do this; it should be abstracted and made MP-safe.
1907 * (Hmm... where have we seen this warning before :-> GO )
1908 */
1909 static int
1910 raidlock(rs)
1911 struct raid_softc *rs;
1912 {
1913 int error;
1914
1915 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1916 rs->sc_flags |= RAIDF_WANTED;
1917 if ((error =
1918 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1919 return (error);
1920 }
1921 rs->sc_flags |= RAIDF_LOCKED;
1922 return (0);
1923 }
1924 /*
1925 * Unlock and wake up any waiters.
1926 */
1927 static void
1928 raidunlock(rs)
1929 struct raid_softc *rs;
1930 {
1931
1932 rs->sc_flags &= ~RAIDF_LOCKED;
1933 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1934 rs->sc_flags &= ~RAIDF_WANTED;
1935 wakeup(rs);
1936 }
1937 }
1938
1939
1940 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1941 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1942
1943 int
1944 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1945 {
1946 RF_ComponentLabel_t component_label;
1947 raidread_component_label(dev, b_vp, &component_label);
1948 component_label.mod_counter = mod_counter;
1949 component_label.clean = RF_RAID_CLEAN;
1950 raidwrite_component_label(dev, b_vp, &component_label);
1951 return(0);
1952 }
1953
1954
1955 int
1956 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1957 {
1958 RF_ComponentLabel_t component_label;
1959 raidread_component_label(dev, b_vp, &component_label);
1960 component_label.mod_counter = mod_counter;
1961 component_label.clean = RF_RAID_DIRTY;
1962 raidwrite_component_label(dev, b_vp, &component_label);
1963 return(0);
1964 }
1965
1966 /* ARGSUSED */
1967 int
1968 raidread_component_label(dev, b_vp, component_label)
1969 dev_t dev;
1970 struct vnode *b_vp;
1971 RF_ComponentLabel_t *component_label;
1972 {
1973 struct buf *bp;
1974 int error;
1975
1976 /* XXX should probably ensure that we don't try to do this if
1977 someone has changed rf_protected_sectors. */
1978
1979 /* get a block of the appropriate size... */
1980 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1981 bp->b_dev = dev;
1982
1983 /* get our ducks in a row for the read */
1984 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1985 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1986 bp->b_flags = B_BUSY | B_READ;
1987 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1988
1989 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1990
1991 error = biowait(bp);
1992
1993 if (!error) {
1994 memcpy(component_label, bp->b_un.b_addr,
1995 sizeof(RF_ComponentLabel_t));
1996 #if 0
1997 printf("raidread_component_label: got component label:\n");
1998 printf("Version: %d\n",component_label->version);
1999 printf("Serial Number: %d\n",component_label->serial_number);
2000 printf("Mod counter: %d\n",component_label->mod_counter);
2001 printf("Row: %d\n", component_label->row);
2002 printf("Column: %d\n", component_label->column);
2003 printf("Num Rows: %d\n", component_label->num_rows);
2004 printf("Num Columns: %d\n", component_label->num_columns);
2005 printf("Clean: %d\n", component_label->clean);
2006 printf("Status: %d\n", component_label->status);
2007 #endif
2008 } else {
2009 printf("Failed to read RAID component label!\n");
2010 }
2011
2012 bp->b_flags = B_INVAL | B_AGE;
2013 brelse(bp);
2014 return(error);
2015 }
2016 /* ARGSUSED */
2017 int
2018 raidwrite_component_label(dev, b_vp, component_label)
2019 dev_t dev;
2020 struct vnode *b_vp;
2021 RF_ComponentLabel_t *component_label;
2022 {
2023 struct buf *bp;
2024 int error;
2025
2026 /* get a block of the appropriate size... */
2027 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2028 bp->b_dev = dev;
2029
2030 /* get our ducks in a row for the write */
2031 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2032 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2033 bp->b_flags = B_BUSY | B_WRITE;
2034 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2035
2036 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2037
2038 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2039
2040 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2041 error = biowait(bp);
2042 bp->b_flags = B_INVAL | B_AGE;
2043 brelse(bp);
2044 if (error) {
2045 printf("Failed to write RAID component info!\n");
2046 }
2047
2048 return(error);
2049 }
2050
2051 void
2052 rf_markalldirty( raidPtr )
2053 RF_Raid_t *raidPtr;
2054 {
2055 RF_ComponentLabel_t c_label;
2056 int r,c;
2057
2058 raidPtr->mod_counter++;
2059 for (r = 0; r < raidPtr->numRow; r++) {
2060 for (c = 0; c < raidPtr->numCol; c++) {
2061 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2062 raidread_component_label(
2063 raidPtr->Disks[r][c].dev,
2064 raidPtr->raid_cinfo[r][c].ci_vp,
2065 &c_label);
2066 if (c_label.status == rf_ds_spared) {
2067 /* XXX do something special...
2068 but whatever you do, don't
2069 try to access it!! */
2070 } else {
2071 #if 0
2072 c_label.status =
2073 raidPtr->Disks[r][c].status;
2074 raidwrite_component_label(
2075 raidPtr->Disks[r][c].dev,
2076 raidPtr->raid_cinfo[r][c].ci_vp,
2077 &c_label);
2078 #endif
2079 raidmarkdirty(
2080 raidPtr->Disks[r][c].dev,
2081 raidPtr->raid_cinfo[r][c].ci_vp,
2082 raidPtr->mod_counter);
2083 }
2084 }
2085 }
2086 }
2087 /* printf("Component labels marked dirty.\n"); */
2088 #if 0
2089 for( c = 0; c < raidPtr->numSpare ; c++) {
2090 sparecol = raidPtr->numCol + c;
2091 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2092 /*
2093
2094 XXX this is where we get fancy and map this spare
2095 into it's correct spot in the array.
2096
2097 */
2098 /*
2099
2100 we claim this disk is "optimal" if it's
2101 rf_ds_used_spare, as that means it should be
2102 directly substitutable for the disk it replaced.
2103 We note that too...
2104
2105 */
2106
2107 for(i=0;i<raidPtr->numRow;i++) {
2108 for(j=0;j<raidPtr->numCol;j++) {
2109 if ((raidPtr->Disks[i][j].spareRow ==
2110 r) &&
2111 (raidPtr->Disks[i][j].spareCol ==
2112 sparecol)) {
2113 srow = r;
2114 scol = sparecol;
2115 break;
2116 }
2117 }
2118 }
2119
2120 raidread_component_label(
2121 raidPtr->Disks[r][sparecol].dev,
2122 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2123 &c_label);
2124 /* make sure status is noted */
2125 c_label.version = RF_COMPONENT_LABEL_VERSION;
2126 c_label.mod_counter = raidPtr->mod_counter;
2127 c_label.serial_number = raidPtr->serial_number;
2128 c_label.row = srow;
2129 c_label.column = scol;
2130 c_label.num_rows = raidPtr->numRow;
2131 c_label.num_columns = raidPtr->numCol;
2132 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2133 c_label.status = rf_ds_optimal;
2134 raidwrite_component_label(
2135 raidPtr->Disks[r][sparecol].dev,
2136 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2137 &c_label);
2138 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2139 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2140 }
2141 }
2142
2143 #endif
2144 }
2145
2146
2147 void
2148 rf_update_component_labels( raidPtr )
2149 RF_Raid_t *raidPtr;
2150 {
2151 RF_ComponentLabel_t c_label;
2152 int sparecol;
2153 int r,c;
2154 int i,j;
2155 int srow, scol;
2156
2157 srow = -1;
2158 scol = -1;
2159
2160 /* XXX should do extra checks to make sure things really are clean,
2161 rather than blindly setting the clean bit... */
2162
2163 raidPtr->mod_counter++;
2164
2165 for (r = 0; r < raidPtr->numRow; r++) {
2166 for (c = 0; c < raidPtr->numCol; c++) {
2167 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2168 raidread_component_label(
2169 raidPtr->Disks[r][c].dev,
2170 raidPtr->raid_cinfo[r][c].ci_vp,
2171 &c_label);
2172 /* make sure status is noted */
2173 c_label.status = rf_ds_optimal;
2174 raidwrite_component_label(
2175 raidPtr->Disks[r][c].dev,
2176 raidPtr->raid_cinfo[r][c].ci_vp,
2177 &c_label);
2178 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2179 raidmarkclean(
2180 raidPtr->Disks[r][c].dev,
2181 raidPtr->raid_cinfo[r][c].ci_vp,
2182 raidPtr->mod_counter);
2183 }
2184 }
2185 /* else we don't touch it.. */
2186 #if 0
2187 else if (raidPtr->Disks[r][c].status !=
2188 rf_ds_failed) {
2189 raidread_component_label(
2190 raidPtr->Disks[r][c].dev,
2191 raidPtr->raid_cinfo[r][c].ci_vp,
2192 &c_label);
2193 /* make sure status is noted */
2194 c_label.status =
2195 raidPtr->Disks[r][c].status;
2196 raidwrite_component_label(
2197 raidPtr->Disks[r][c].dev,
2198 raidPtr->raid_cinfo[r][c].ci_vp,
2199 &c_label);
2200 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2201 raidmarkclean(
2202 raidPtr->Disks[r][c].dev,
2203 raidPtr->raid_cinfo[r][c].ci_vp,
2204 raidPtr->mod_counter);
2205 }
2206 }
2207 #endif
2208 }
2209 }
2210
2211 for( c = 0; c < raidPtr->numSpare ; c++) {
2212 sparecol = raidPtr->numCol + c;
2213 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2214 /*
2215
2216 we claim this disk is "optimal" if it's
2217 rf_ds_used_spare, as that means it should be
2218 directly substitutable for the disk it replaced.
2219 We note that too...
2220
2221 */
2222
2223 for(i=0;i<raidPtr->numRow;i++) {
2224 for(j=0;j<raidPtr->numCol;j++) {
2225 if ((raidPtr->Disks[i][j].spareRow ==
2226 0) &&
2227 (raidPtr->Disks[i][j].spareCol ==
2228 sparecol)) {
2229 srow = i;
2230 scol = j;
2231 break;
2232 }
2233 }
2234 }
2235
2236 raidread_component_label(
2237 raidPtr->Disks[0][sparecol].dev,
2238 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2239 &c_label);
2240 /* make sure status is noted */
2241 c_label.version = RF_COMPONENT_LABEL_VERSION;
2242 c_label.mod_counter = raidPtr->mod_counter;
2243 c_label.serial_number = raidPtr->serial_number;
2244 c_label.row = srow;
2245 c_label.column = scol;
2246 c_label.num_rows = raidPtr->numRow;
2247 c_label.num_columns = raidPtr->numCol;
2248 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2249 c_label.status = rf_ds_optimal;
2250 raidwrite_component_label(
2251 raidPtr->Disks[0][sparecol].dev,
2252 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2253 &c_label);
2254 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2255 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2256 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2257 raidPtr->mod_counter);
2258 }
2259 }
2260 }
2261 /* printf("Component labels updated\n"); */
2262 }
2263
2264 void
2265 rf_ReconThread(req)
2266 struct rf_recon_req *req;
2267 {
2268 int s;
2269 RF_Raid_t *raidPtr;
2270
2271 s = splbio();
2272 raidPtr = (RF_Raid_t *) req->raidPtr;
2273 raidPtr->recon_in_progress = 1;
2274
2275 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2276 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2277
2278 /* XXX get rid of this! we don't need it at all.. */
2279 RF_Free(req, sizeof(*req));
2280
2281 raidPtr->recon_in_progress = 0;
2282 splx(s);
2283
2284 /* That's all... */
2285 kthread_exit(0); /* does not return */
2286 }
2287
2288 void
2289 rf_RewriteParityThread(raidPtr)
2290 RF_Raid_t *raidPtr;
2291 {
2292 int retcode;
2293 int s;
2294
2295 raidPtr->parity_rewrite_in_progress = 1;
2296 s = splbio();
2297 retcode = rf_RewriteParity(raidPtr);
2298 splx(s);
2299 if (retcode) {
2300 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2301 } else {
2302 /* set the clean bit! If we shutdown correctly,
2303 the clean bit on each component label will get
2304 set */
2305 raidPtr->parity_good = RF_RAID_CLEAN;
2306 }
2307 raidPtr->parity_rewrite_in_progress = 0;
2308
2309 /* That's all... */
2310 kthread_exit(0); /* does not return */
2311 }
2312
2313
2314 void
2315 rf_CopybackThread(raidPtr)
2316 RF_Raid_t *raidPtr;
2317 {
2318 int s;
2319
2320 raidPtr->copyback_in_progress = 1;
2321 s = splbio();
2322 rf_CopybackReconstructedData(raidPtr);
2323 splx(s);
2324 raidPtr->copyback_in_progress = 0;
2325
2326 /* That's all... */
2327 kthread_exit(0); /* does not return */
2328 }
2329
2330
2331 void
2332 rf_ReconstructInPlaceThread(req)
2333 struct rf_recon_req *req;
2334 {
2335 int retcode;
2336 int s;
2337 RF_Raid_t *raidPtr;
2338
2339 s = splbio();
2340 raidPtr = req->raidPtr;
2341 raidPtr->recon_in_progress = 1;
2342 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2343 RF_Free(req, sizeof(*req));
2344 raidPtr->recon_in_progress = 0;
2345 splx(s);
2346
2347 /* That's all... */
2348 kthread_exit(0); /* does not return */
2349 }
2350