rf_netbsdkintf.c revision 1.43 1 /* $NetBSD: rf_netbsdkintf.c,v 1.43 2000/01/08 02:04:06 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 /* prototypes */
183 static void KernelWakeupFunc(struct buf * bp);
184 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
185 dev_t dev, RF_SectorNum_t startSect,
186 RF_SectorCount_t numSect, caddr_t buf,
187 void (*cbFunc) (struct buf *), void *cbArg,
188 int logBytesPerSector, struct proc * b_proc);
189
190 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
191 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
192
193 void raidattach __P((int));
194 int raidsize __P((dev_t));
195
196 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
197 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
198 static int raidinit __P((dev_t, RF_Raid_t *, int));
199
200 int raidopen __P((dev_t, int, int, struct proc *));
201 int raidclose __P((dev_t, int, int, struct proc *));
202 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
203 int raidwrite __P((dev_t, struct uio *, int));
204 int raidread __P((dev_t, struct uio *, int));
205 void raidstrategy __P((struct buf *));
206 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
207
208 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
209 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
210 void rf_update_component_labels( RF_Raid_t *);
211 /*
212 * Pilfered from ccd.c
213 */
214
215 struct raidbuf {
216 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
217 struct buf *rf_obp; /* ptr. to original I/O buf */
218 int rf_flags; /* misc. flags */
219 RF_DiskQueueData_t *req;/* the request that this was part of.. */
220 };
221
222
223 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
224 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
225
226 /* XXX Not sure if the following should be replacing the raidPtrs above,
227 or if it should be used in conjunction with that... */
228
229 struct raid_softc {
230 int sc_flags; /* flags */
231 int sc_cflags; /* configuration flags */
232 size_t sc_size; /* size of the raid device */
233 dev_t sc_dev; /* our device.. */
234 char sc_xname[20]; /* XXX external name */
235 struct disk sc_dkdev; /* generic disk device info */
236 struct pool sc_cbufpool; /* component buffer pool */
237 struct buf buf_queue; /* used for the device queue */
238 };
239 /* sc_flags */
240 #define RAIDF_INITED 0x01 /* unit has been initialized */
241 #define RAIDF_WLABEL 0x02 /* label area is writable */
242 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
243 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
244 #define RAIDF_LOCKED 0x80 /* unit is locked */
245
246 #define raidunit(x) DISKUNIT(x)
247 static int numraid = 0;
248
249 /*
250 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
251 * Be aware that large numbers can allow the driver to consume a lot of
252 * kernel memory, especially on writes, and in degraded mode reads.
253 *
254 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
255 * a single 64K write will typically require 64K for the old data,
256 * 64K for the old parity, and 64K for the new parity, for a total
257 * of 192K (if the parity buffer is not re-used immediately).
258 * Even it if is used immedately, that's still 128K, which when multiplied
259 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
260 *
261 * Now in degraded mode, for example, a 64K read on the above setup may
262 * require data reconstruction, which will require *all* of the 4 remaining
263 * disks to participate -- 4 * 32K/disk == 128K again.
264 */
265
266 #ifndef RAIDOUTSTANDING
267 #define RAIDOUTSTANDING 6
268 #endif
269
270 #define RAIDLABELDEV(dev) \
271 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
272
273 /* declared here, and made public, for the benefit of KVM stuff.. */
274 struct raid_softc *raid_softc;
275
276 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
277 struct disklabel *));
278 static void raidgetdisklabel __P((dev_t));
279 static void raidmakedisklabel __P((struct raid_softc *));
280
281 static int raidlock __P((struct raid_softc *));
282 static void raidunlock __P((struct raid_softc *));
283 int raidlookup __P((char *, struct proc * p, struct vnode **));
284
285 static void rf_markalldirty __P((RF_Raid_t *));
286
287 void rf_ReconThread __P((struct rf_recon_req *));
288 /* XXX what I want is: */
289 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
290 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
291 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
292 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
293
294 void
295 raidattach(num)
296 int num;
297 {
298 int raidID;
299 int i, rc;
300
301 #ifdef DEBUG
302 printf("raidattach: Asked for %d units\n", num);
303 #endif
304
305 if (num <= 0) {
306 #ifdef DIAGNOSTIC
307 panic("raidattach: count <= 0");
308 #endif
309 return;
310 }
311 /* This is where all the initialization stuff gets done. */
312
313 /* Make some space for requested number of units... */
314
315 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
316 if (raidPtrs == NULL) {
317 panic("raidPtrs is NULL!!\n");
318 }
319
320 rc = rf_mutex_init(&rf_sparet_wait_mutex);
321 if (rc) {
322 RF_PANIC();
323 }
324
325 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
326
327 for (i = 0; i < numraid; i++)
328 raidPtrs[i] = NULL;
329 rc = rf_BootRaidframe();
330 if (rc == 0)
331 printf("Kernelized RAIDframe activated\n");
332 else
333 panic("Serious error booting RAID!!\n");
334
335 /* put together some datastructures like the CCD device does.. This
336 * lets us lock the device and what-not when it gets opened. */
337
338 raid_softc = (struct raid_softc *)
339 malloc(num * sizeof(struct raid_softc),
340 M_RAIDFRAME, M_NOWAIT);
341 if (raid_softc == NULL) {
342 printf("WARNING: no memory for RAIDframe driver\n");
343 return;
344 }
345 numraid = num;
346 bzero(raid_softc, num * sizeof(struct raid_softc));
347
348 for (raidID = 0; raidID < num; raidID++) {
349 raid_softc[raidID].buf_queue.b_actf = NULL;
350 raid_softc[raidID].buf_queue.b_actb =
351 &raid_softc[raidID].buf_queue.b_actf;
352 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
353 (RF_Raid_t *));
354 if (raidPtrs[raidID] == NULL) {
355 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
356 numraid = raidID;
357 return;
358 }
359 }
360 }
361
362
363 int
364 raidsize(dev)
365 dev_t dev;
366 {
367 struct raid_softc *rs;
368 struct disklabel *lp;
369 int part, unit, omask, size;
370
371 unit = raidunit(dev);
372 if (unit >= numraid)
373 return (-1);
374 rs = &raid_softc[unit];
375
376 if ((rs->sc_flags & RAIDF_INITED) == 0)
377 return (-1);
378
379 part = DISKPART(dev);
380 omask = rs->sc_dkdev.dk_openmask & (1 << part);
381 lp = rs->sc_dkdev.dk_label;
382
383 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
384 return (-1);
385
386 if (lp->d_partitions[part].p_fstype != FS_SWAP)
387 size = -1;
388 else
389 size = lp->d_partitions[part].p_size *
390 (lp->d_secsize / DEV_BSIZE);
391
392 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
393 return (-1);
394
395 return (size);
396
397 }
398
399 int
400 raiddump(dev, blkno, va, size)
401 dev_t dev;
402 daddr_t blkno;
403 caddr_t va;
404 size_t size;
405 {
406 /* Not implemented. */
407 return ENXIO;
408 }
409 /* ARGSUSED */
410 int
411 raidopen(dev, flags, fmt, p)
412 dev_t dev;
413 int flags, fmt;
414 struct proc *p;
415 {
416 int unit = raidunit(dev);
417 struct raid_softc *rs;
418 struct disklabel *lp;
419 int part, pmask;
420 int error = 0;
421
422 if (unit >= numraid)
423 return (ENXIO);
424 rs = &raid_softc[unit];
425
426 if ((error = raidlock(rs)) != 0)
427 return (error);
428 lp = rs->sc_dkdev.dk_label;
429
430 part = DISKPART(dev);
431 pmask = (1 << part);
432
433 db1_printf(("Opening raid device number: %d partition: %d\n",
434 unit, part));
435
436
437 if ((rs->sc_flags & RAIDF_INITED) &&
438 (rs->sc_dkdev.dk_openmask == 0))
439 raidgetdisklabel(dev);
440
441 /* make sure that this partition exists */
442
443 if (part != RAW_PART) {
444 db1_printf(("Not a raw partition..\n"));
445 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
446 ((part >= lp->d_npartitions) ||
447 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
448 error = ENXIO;
449 raidunlock(rs);
450 db1_printf(("Bailing out...\n"));
451 return (error);
452 }
453 }
454 /* Prevent this unit from being unconfigured while open. */
455 switch (fmt) {
456 case S_IFCHR:
457 rs->sc_dkdev.dk_copenmask |= pmask;
458 break;
459
460 case S_IFBLK:
461 rs->sc_dkdev.dk_bopenmask |= pmask;
462 break;
463 }
464
465 if ((rs->sc_dkdev.dk_openmask == 0) &&
466 ((rs->sc_flags & RAIDF_INITED) != 0)) {
467 /* First one... mark things as dirty... Note that we *MUST*
468 have done a configure before this. I DO NOT WANT TO BE
469 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
470 THAT THEY BELONG TOGETHER!!!!! */
471 /* XXX should check to see if we're only open for reading
472 here... If so, we needn't do this, but then need some
473 other way of keeping track of what's happened.. */
474
475 rf_markalldirty( raidPtrs[unit] );
476 }
477
478
479 rs->sc_dkdev.dk_openmask =
480 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
481
482 raidunlock(rs);
483
484 return (error);
485
486
487 }
488 /* ARGSUSED */
489 int
490 raidclose(dev, flags, fmt, p)
491 dev_t dev;
492 int flags, fmt;
493 struct proc *p;
494 {
495 int unit = raidunit(dev);
496 struct raid_softc *rs;
497 int error = 0;
498 int part;
499
500 if (unit >= numraid)
501 return (ENXIO);
502 rs = &raid_softc[unit];
503
504 if ((error = raidlock(rs)) != 0)
505 return (error);
506
507 part = DISKPART(dev);
508
509 /* ...that much closer to allowing unconfiguration... */
510 switch (fmt) {
511 case S_IFCHR:
512 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
513 break;
514
515 case S_IFBLK:
516 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
517 break;
518 }
519 rs->sc_dkdev.dk_openmask =
520 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
521
522 if ((rs->sc_dkdev.dk_openmask == 0) &&
523 ((rs->sc_flags & RAIDF_INITED) != 0)) {
524 /* Last one... device is not unconfigured yet.
525 Device shutdown has taken care of setting the
526 clean bits if RAIDF_INITED is not set
527 mark things as clean... */
528 rf_update_component_labels( raidPtrs[unit] );
529 }
530
531 raidunlock(rs);
532 return (0);
533
534 }
535
536 void
537 raidstrategy(bp)
538 register struct buf *bp;
539 {
540 register int s;
541
542 unsigned int raidID = raidunit(bp->b_dev);
543 RF_Raid_t *raidPtr;
544 struct raid_softc *rs = &raid_softc[raidID];
545 struct disklabel *lp;
546 struct buf *dp;
547 int wlabel;
548
549 if ((rs->sc_flags & RAIDF_INITED) ==0) {
550 bp->b_error = ENXIO;
551 bp->b_flags = B_ERROR;
552 bp->b_resid = bp->b_bcount;
553 biodone(bp);
554 return;
555 }
556 if (raidID >= numraid || !raidPtrs[raidID]) {
557 bp->b_error = ENODEV;
558 bp->b_flags |= B_ERROR;
559 bp->b_resid = bp->b_bcount;
560 biodone(bp);
561 return;
562 }
563 raidPtr = raidPtrs[raidID];
564 if (!raidPtr->valid) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 if (bp->b_bcount == 0) {
572 db1_printf(("b_bcount is zero..\n"));
573 biodone(bp);
574 return;
575 }
576 lp = rs->sc_dkdev.dk_label;
577
578 /*
579 * Do bounds checking and adjust transfer. If there's an
580 * error, the bounds check will flag that for us.
581 */
582
583 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
584 if (DISKPART(bp->b_dev) != RAW_PART)
585 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
586 db1_printf(("Bounds check failed!!:%d %d\n",
587 (int) bp->b_blkno, (int) wlabel));
588 biodone(bp);
589 return;
590 }
591 s = splbio();
592
593 bp->b_resid = 0;
594
595 /* stuff it onto our queue */
596
597 dp = &rs->buf_queue;
598 bp->b_actf = NULL;
599 bp->b_actb = dp->b_actb;
600 *dp->b_actb = bp;
601 dp->b_actb = &bp->b_actf;
602
603 raidstart(raidPtrs[raidID]);
604
605 splx(s);
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 RF_Config_t *k_cfg, *u_cfg;
665 RF_Raid_t *raidPtr;
666 RF_AccTotals_t *totals;
667 RF_DeviceConfig_t *d_cfg, **ucfgp;
668 u_char *specific_buf;
669 int retcode = 0;
670 int row;
671 int column;
672 struct rf_recon_req *rrcopy, *rr;
673 RF_ComponentLabel_t *component_label;
674 RF_ComponentLabel_t ci_label;
675 RF_ComponentLabel_t **c_label_ptr;
676 RF_SingleComponent_t *sparePtr,*componentPtr;
677 RF_SingleComponent_t hot_spare;
678 RF_SingleComponent_t component;
679 int i, j, d;
680
681 if (unit >= numraid)
682 return (ENXIO);
683 rs = &raid_softc[unit];
684 raidPtr = raidPtrs[unit];
685
686 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
687 (int) DISKPART(dev), (int) unit, (int) cmd));
688
689 /* Must be open for writes for these commands... */
690 switch (cmd) {
691 case DIOCSDINFO:
692 case DIOCWDINFO:
693 case DIOCWLABEL:
694 if ((flag & FWRITE) == 0)
695 return (EBADF);
696 }
697
698 /* Must be initialized for these... */
699 switch (cmd) {
700 case DIOCGDINFO:
701 case DIOCSDINFO:
702 case DIOCWDINFO:
703 case DIOCGPART:
704 case DIOCWLABEL:
705 case DIOCGDEFLABEL:
706 case RAIDFRAME_SHUTDOWN:
707 case RAIDFRAME_REWRITEPARITY:
708 case RAIDFRAME_GET_INFO:
709 case RAIDFRAME_RESET_ACCTOTALS:
710 case RAIDFRAME_GET_ACCTOTALS:
711 case RAIDFRAME_KEEP_ACCTOTALS:
712 case RAIDFRAME_GET_SIZE:
713 case RAIDFRAME_FAIL_DISK:
714 case RAIDFRAME_COPYBACK:
715 case RAIDFRAME_CHECK_RECON_STATUS:
716 case RAIDFRAME_GET_COMPONENT_LABEL:
717 case RAIDFRAME_SET_COMPONENT_LABEL:
718 case RAIDFRAME_ADD_HOT_SPARE:
719 case RAIDFRAME_REMOVE_HOT_SPARE:
720 case RAIDFRAME_INIT_LABELS:
721 case RAIDFRAME_REBUILD_IN_PLACE:
722 case RAIDFRAME_CHECK_PARITY:
723 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
724 case RAIDFRAME_CHECK_COPYBACK_STATUS:
725 if ((rs->sc_flags & RAIDF_INITED) == 0)
726 return (ENXIO);
727 }
728
729 switch (cmd) {
730
731 /* configure the system */
732 case RAIDFRAME_CONFIGURE:
733 /* copy-in the configuration information */
734 /* data points to a pointer to the configuration structure */
735
736 u_cfg = *((RF_Config_t **) data);
737 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
738 if (k_cfg == NULL) {
739 return (ENOMEM);
740 }
741 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
742 sizeof(RF_Config_t));
743 if (retcode) {
744 RF_Free(k_cfg, sizeof(RF_Config_t));
745 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
746 retcode));
747 return (retcode);
748 }
749 /* allocate a buffer for the layout-specific data, and copy it
750 * in */
751 if (k_cfg->layoutSpecificSize) {
752 if (k_cfg->layoutSpecificSize > 10000) {
753 /* sanity check */
754 RF_Free(k_cfg, sizeof(RF_Config_t));
755 return (EINVAL);
756 }
757 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
758 (u_char *));
759 if (specific_buf == NULL) {
760 RF_Free(k_cfg, sizeof(RF_Config_t));
761 return (ENOMEM);
762 }
763 retcode = copyin(k_cfg->layoutSpecific,
764 (caddr_t) specific_buf,
765 k_cfg->layoutSpecificSize);
766 if (retcode) {
767 RF_Free(k_cfg, sizeof(RF_Config_t));
768 RF_Free(specific_buf,
769 k_cfg->layoutSpecificSize);
770 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
771 retcode));
772 return (retcode);
773 }
774 } else
775 specific_buf = NULL;
776 k_cfg->layoutSpecific = specific_buf;
777
778 /* should do some kind of sanity check on the configuration.
779 * Store the sum of all the bytes in the last byte? */
780
781 /* configure the system */
782
783 raidPtr->raidid = unit;
784
785 retcode = rf_Configure(raidPtr, k_cfg);
786
787 if (retcode == 0) {
788
789 /* allow this many simultaneous IO's to
790 this RAID device */
791 raidPtr->openings = RAIDOUTSTANDING;
792
793 /* XXX should be moved to rf_Configure() */
794
795 raidPtr->copyback_in_progress = 0;
796 raidPtr->parity_rewrite_in_progress = 0;
797 raidPtr->recon_in_progress = 0;
798
799 retcode = raidinit(dev, raidPtr, unit);
800 rf_markalldirty( raidPtr );
801 }
802 /* free the buffers. No return code here. */
803 if (k_cfg->layoutSpecificSize) {
804 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
805 }
806 RF_Free(k_cfg, sizeof(RF_Config_t));
807
808 return (retcode);
809
810 /* shutdown the system */
811 case RAIDFRAME_SHUTDOWN:
812
813 if ((error = raidlock(rs)) != 0)
814 return (error);
815
816 /*
817 * If somebody has a partition mounted, we shouldn't
818 * shutdown.
819 */
820
821 part = DISKPART(dev);
822 pmask = (1 << part);
823 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
824 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
825 (rs->sc_dkdev.dk_copenmask & pmask))) {
826 raidunlock(rs);
827 return (EBUSY);
828 }
829
830 retcode = rf_Shutdown(raidPtr);
831
832 pool_destroy(&rs->sc_cbufpool);
833
834 /* It's no longer initialized... */
835 rs->sc_flags &= ~RAIDF_INITED;
836
837 /* Detach the disk. */
838 disk_detach(&rs->sc_dkdev);
839
840 raidunlock(rs);
841
842 return (retcode);
843 case RAIDFRAME_GET_COMPONENT_LABEL:
844 c_label_ptr = (RF_ComponentLabel_t **) data;
845 /* need to read the component label for the disk indicated
846 by row,column in component_label */
847
848 /* For practice, let's get it directly fromdisk, rather
849 than from the in-core copy */
850 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
851 (RF_ComponentLabel_t *));
852 if (component_label == NULL)
853 return (ENOMEM);
854
855 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
856
857 retcode = copyin( *c_label_ptr, component_label,
858 sizeof(RF_ComponentLabel_t));
859
860 if (retcode) {
861 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
862 return(retcode);
863 }
864
865 row = component_label->row;
866 column = component_label->column;
867
868 if ((row < 0) || (row >= raidPtr->numRow) ||
869 (column < 0) || (column >= raidPtr->numCol)) {
870 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
871 return(EINVAL);
872 }
873
874 raidread_component_label(
875 raidPtr->Disks[row][column].dev,
876 raidPtr->raid_cinfo[row][column].ci_vp,
877 component_label );
878
879 retcode = copyout((caddr_t) component_label,
880 (caddr_t) *c_label_ptr,
881 sizeof(RF_ComponentLabel_t));
882 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
883 return (retcode);
884
885 case RAIDFRAME_SET_COMPONENT_LABEL:
886 component_label = (RF_ComponentLabel_t *) data;
887
888 /* XXX check the label for valid stuff... */
889 /* Note that some things *should not* get modified --
890 the user should be re-initing the labels instead of
891 trying to patch things.
892 */
893
894 printf("Got component label:\n");
895 printf("Version: %d\n",component_label->version);
896 printf("Serial Number: %d\n",component_label->serial_number);
897 printf("Mod counter: %d\n",component_label->mod_counter);
898 printf("Row: %d\n", component_label->row);
899 printf("Column: %d\n", component_label->column);
900 printf("Num Rows: %d\n", component_label->num_rows);
901 printf("Num Columns: %d\n", component_label->num_columns);
902 printf("Clean: %d\n", component_label->clean);
903 printf("Status: %d\n", component_label->status);
904
905 row = component_label->row;
906 column = component_label->column;
907
908 if ((row < 0) || (row >= raidPtr->numRow) ||
909 (column < 0) || (column >= raidPtr->numCol)) {
910 return(EINVAL);
911 }
912
913 /* XXX this isn't allowed to do anything for now :-) */
914 #if 0
915 raidwrite_component_label(
916 raidPtr->Disks[row][column].dev,
917 raidPtr->raid_cinfo[row][column].ci_vp,
918 component_label );
919 #endif
920 return (0);
921
922 case RAIDFRAME_INIT_LABELS:
923 component_label = (RF_ComponentLabel_t *) data;
924 /*
925 we only want the serial number from
926 the above. We get all the rest of the information
927 from the config that was used to create this RAID
928 set.
929 */
930
931 raidPtr->serial_number = component_label->serial_number;
932 /* current version number */
933 ci_label.version = RF_COMPONENT_LABEL_VERSION;
934 ci_label.serial_number = component_label->serial_number;
935 ci_label.mod_counter = raidPtr->mod_counter;
936 ci_label.num_rows = raidPtr->numRow;
937 ci_label.num_columns = raidPtr->numCol;
938 ci_label.clean = RF_RAID_DIRTY; /* not clean */
939 ci_label.status = rf_ds_optimal; /* "It's good!" */
940
941 for(row=0;row<raidPtr->numRow;row++) {
942 ci_label.row = row;
943 for(column=0;column<raidPtr->numCol;column++) {
944 ci_label.column = column;
945 raidwrite_component_label(
946 raidPtr->Disks[row][column].dev,
947 raidPtr->raid_cinfo[row][column].ci_vp,
948 &ci_label );
949 }
950 }
951
952 return (retcode);
953
954 /* initialize all parity */
955 case RAIDFRAME_REWRITEPARITY:
956
957 if (raidPtr->Layout.map->faultsTolerated == 0) {
958 /* Parity for RAID 0 is trivially correct */
959 raidPtr->parity_good = RF_RAID_CLEAN;
960 return(0);
961 }
962
963 if (raidPtr->parity_rewrite_in_progress == 1) {
964 /* Re-write is already in progress! */
965 return(EINVAL);
966 }
967
968 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
969 rf_RewriteParityThread,
970 raidPtr,"raid_parity");
971 return (retcode);
972
973
974 case RAIDFRAME_ADD_HOT_SPARE:
975 sparePtr = (RF_SingleComponent_t *) data;
976 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
977 printf("Adding spare\n");
978 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
979 return(retcode);
980
981 case RAIDFRAME_REMOVE_HOT_SPARE:
982 return(retcode);
983
984 case RAIDFRAME_REBUILD_IN_PLACE:
985
986 if (raidPtr->Layout.map->faultsTolerated == 0) {
987 /* Can't do this on a RAID 0!! */
988 return(EINVAL);
989 }
990
991 if (raidPtr->recon_in_progress == 1) {
992 /* a reconstruct is already in progress! */
993 return(EINVAL);
994 }
995
996 componentPtr = (RF_SingleComponent_t *) data;
997 memcpy( &component, componentPtr,
998 sizeof(RF_SingleComponent_t));
999 row = component.row;
1000 column = component.column;
1001 printf("Rebuild: %d %d\n",row, column);
1002 if ((row < 0) || (row >= raidPtr->numRow) ||
1003 (column < 0) || (column >= raidPtr->numCol)) {
1004 return(EINVAL);
1005 }
1006
1007 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1008 if (rrcopy == NULL)
1009 return(ENOMEM);
1010
1011 rrcopy->raidPtr = (void *) raidPtr;
1012 rrcopy->row = row;
1013 rrcopy->col = column;
1014
1015 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1016 rf_ReconstructInPlaceThread,
1017 rrcopy,"raid_reconip");
1018 return(retcode);
1019
1020 case RAIDFRAME_GET_INFO:
1021 if (!raidPtr->valid)
1022 return (ENODEV);
1023 ucfgp = (RF_DeviceConfig_t **) data;
1024 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1025 (RF_DeviceConfig_t *));
1026 if (d_cfg == NULL)
1027 return (ENOMEM);
1028 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1029 d_cfg->rows = raidPtr->numRow;
1030 d_cfg->cols = raidPtr->numCol;
1031 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1032 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1033 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1034 return (ENOMEM);
1035 }
1036 d_cfg->nspares = raidPtr->numSpare;
1037 if (d_cfg->nspares >= RF_MAX_DISKS) {
1038 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1039 return (ENOMEM);
1040 }
1041 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1042 d = 0;
1043 for (i = 0; i < d_cfg->rows; i++) {
1044 for (j = 0; j < d_cfg->cols; j++) {
1045 d_cfg->devs[d] = raidPtr->Disks[i][j];
1046 d++;
1047 }
1048 }
1049 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1050 d_cfg->spares[i] = raidPtr->Disks[0][j];
1051 }
1052 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1053 sizeof(RF_DeviceConfig_t));
1054 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1055
1056 return (retcode);
1057
1058 case RAIDFRAME_CHECK_PARITY:
1059 *(int *) data = raidPtr->parity_good;
1060 return (0);
1061
1062 case RAIDFRAME_RESET_ACCTOTALS:
1063 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1064 return (0);
1065
1066 case RAIDFRAME_GET_ACCTOTALS:
1067 totals = (RF_AccTotals_t *) data;
1068 *totals = raidPtr->acc_totals;
1069 return (0);
1070
1071 case RAIDFRAME_KEEP_ACCTOTALS:
1072 raidPtr->keep_acc_totals = *(int *)data;
1073 return (0);
1074
1075 case RAIDFRAME_GET_SIZE:
1076 *(int *) data = raidPtr->totalSectors;
1077 return (0);
1078
1079 /* fail a disk & optionally start reconstruction */
1080 case RAIDFRAME_FAIL_DISK:
1081
1082 if (raidPtr->Layout.map->faultsTolerated == 0) {
1083 /* Can't do this on a RAID 0!! */
1084 return(EINVAL);
1085 }
1086
1087 rr = (struct rf_recon_req *) data;
1088
1089 if (rr->row < 0 || rr->row >= raidPtr->numRow
1090 || rr->col < 0 || rr->col >= raidPtr->numCol)
1091 return (EINVAL);
1092
1093 printf("raid%d: Failing the disk: row: %d col: %d\n",
1094 unit, rr->row, rr->col);
1095
1096 /* make a copy of the recon request so that we don't rely on
1097 * the user's buffer */
1098 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1099 if (rrcopy == NULL)
1100 return(ENOMEM);
1101 bcopy(rr, rrcopy, sizeof(*rr));
1102 rrcopy->raidPtr = (void *) raidPtr;
1103
1104 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1105 rf_ReconThread,
1106 rrcopy,"raid_recon");
1107 return (0);
1108
1109 /* invoke a copyback operation after recon on whatever disk
1110 * needs it, if any */
1111 case RAIDFRAME_COPYBACK:
1112
1113 if (raidPtr->Layout.map->faultsTolerated == 0) {
1114 /* This makes no sense on a RAID 0!! */
1115 return(EINVAL);
1116 }
1117
1118 if (raidPtr->copyback_in_progress == 1) {
1119 /* Copyback is already in progress! */
1120 return(EINVAL);
1121 }
1122
1123 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1124 rf_CopybackThread,
1125 raidPtr,"raid_copyback");
1126 return (retcode);
1127
1128 /* return the percentage completion of reconstruction */
1129 case RAIDFRAME_CHECK_RECON_STATUS:
1130 if (raidPtr->Layout.map->faultsTolerated == 0) {
1131 /* This makes no sense on a RAID 0 */
1132 return(EINVAL);
1133 }
1134 row = 0; /* XXX we only consider a single row... */
1135 if (raidPtr->status[row] != rf_rs_reconstructing)
1136 *(int *) data = 100;
1137 else
1138 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1139 return (0);
1140
1141 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1142 if (raidPtr->Layout.map->faultsTolerated == 0) {
1143 /* This makes no sense on a RAID 0 */
1144 return(EINVAL);
1145 }
1146 if (raidPtr->parity_rewrite_in_progress == 1) {
1147 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1148 } else {
1149 *(int *) data = 100;
1150 }
1151 return (0);
1152
1153 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1154 if (raidPtr->Layout.map->faultsTolerated == 0) {
1155 /* This makes no sense on a RAID 0 */
1156 return(EINVAL);
1157 }
1158 if (raidPtr->copyback_in_progress == 1) {
1159 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1160 raidPtr->Layout.numStripe;
1161 } else {
1162 *(int *) data = 100;
1163 }
1164 return (0);
1165
1166
1167 /* the sparetable daemon calls this to wait for the kernel to
1168 * need a spare table. this ioctl does not return until a
1169 * spare table is needed. XXX -- calling mpsleep here in the
1170 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1171 * -- I should either compute the spare table in the kernel,
1172 * or have a different -- XXX XXX -- interface (a different
1173 * character device) for delivering the table -- XXX */
1174 #if 0
1175 case RAIDFRAME_SPARET_WAIT:
1176 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1177 while (!rf_sparet_wait_queue)
1178 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1179 waitreq = rf_sparet_wait_queue;
1180 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1181 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1182
1183 /* structure assignment */
1184 *((RF_SparetWait_t *) data) = *waitreq;
1185
1186 RF_Free(waitreq, sizeof(*waitreq));
1187 return (0);
1188
1189 /* wakes up a process waiting on SPARET_WAIT and puts an error
1190 * code in it that will cause the dameon to exit */
1191 case RAIDFRAME_ABORT_SPARET_WAIT:
1192 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1193 waitreq->fcol = -1;
1194 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1195 waitreq->next = rf_sparet_wait_queue;
1196 rf_sparet_wait_queue = waitreq;
1197 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1198 wakeup(&rf_sparet_wait_queue);
1199 return (0);
1200
1201 /* used by the spare table daemon to deliver a spare table
1202 * into the kernel */
1203 case RAIDFRAME_SEND_SPARET:
1204
1205 /* install the spare table */
1206 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1207
1208 /* respond to the requestor. the return status of the spare
1209 * table installation is passed in the "fcol" field */
1210 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1211 waitreq->fcol = retcode;
1212 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1213 waitreq->next = rf_sparet_resp_queue;
1214 rf_sparet_resp_queue = waitreq;
1215 wakeup(&rf_sparet_resp_queue);
1216 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1217
1218 return (retcode);
1219 #endif
1220
1221 default:
1222 break; /* fall through to the os-specific code below */
1223
1224 }
1225
1226 if (!raidPtr->valid)
1227 return (EINVAL);
1228
1229 /*
1230 * Add support for "regular" device ioctls here.
1231 */
1232
1233 switch (cmd) {
1234 case DIOCGDINFO:
1235 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1236 break;
1237
1238 case DIOCGPART:
1239 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1240 ((struct partinfo *) data)->part =
1241 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1242 break;
1243
1244 case DIOCWDINFO:
1245 case DIOCSDINFO:
1246 if ((error = raidlock(rs)) != 0)
1247 return (error);
1248
1249 rs->sc_flags |= RAIDF_LABELLING;
1250
1251 error = setdisklabel(rs->sc_dkdev.dk_label,
1252 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1253 if (error == 0) {
1254 if (cmd == DIOCWDINFO)
1255 error = writedisklabel(RAIDLABELDEV(dev),
1256 raidstrategy, rs->sc_dkdev.dk_label,
1257 rs->sc_dkdev.dk_cpulabel);
1258 }
1259 rs->sc_flags &= ~RAIDF_LABELLING;
1260
1261 raidunlock(rs);
1262
1263 if (error)
1264 return (error);
1265 break;
1266
1267 case DIOCWLABEL:
1268 if (*(int *) data != 0)
1269 rs->sc_flags |= RAIDF_WLABEL;
1270 else
1271 rs->sc_flags &= ~RAIDF_WLABEL;
1272 break;
1273
1274 case DIOCGDEFLABEL:
1275 raidgetdefaultlabel(raidPtr, rs,
1276 (struct disklabel *) data);
1277 break;
1278
1279 default:
1280 retcode = ENOTTY;
1281 }
1282 return (retcode);
1283
1284 }
1285
1286
1287 /* raidinit -- complete the rest of the initialization for the
1288 RAIDframe device. */
1289
1290
1291 static int
1292 raidinit(dev, raidPtr, unit)
1293 dev_t dev;
1294 RF_Raid_t *raidPtr;
1295 int unit;
1296 {
1297 int retcode;
1298 struct raid_softc *rs;
1299
1300 retcode = 0;
1301
1302 rs = &raid_softc[unit];
1303 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1304 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1305
1306
1307 /* XXX should check return code first... */
1308 rs->sc_flags |= RAIDF_INITED;
1309
1310 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1311
1312 rs->sc_dkdev.dk_name = rs->sc_xname;
1313
1314 /* disk_attach actually creates space for the CPU disklabel, among
1315 * other things, so it's critical to call this *BEFORE* we try putzing
1316 * with disklabels. */
1317
1318 disk_attach(&rs->sc_dkdev);
1319
1320 /* XXX There may be a weird interaction here between this, and
1321 * protectedSectors, as used in RAIDframe. */
1322
1323 rs->sc_size = raidPtr->totalSectors;
1324 rs->sc_dev = dev;
1325
1326 return (retcode);
1327 }
1328
1329 /* wake up the daemon & tell it to get us a spare table
1330 * XXX
1331 * the entries in the queues should be tagged with the raidPtr
1332 * so that in the extremely rare case that two recons happen at once,
1333 * we know for which device were requesting a spare table
1334 * XXX
1335 *
1336 * XXX This code is not currently used. GO
1337 */
1338 int
1339 rf_GetSpareTableFromDaemon(req)
1340 RF_SparetWait_t *req;
1341 {
1342 int retcode;
1343
1344 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1345 req->next = rf_sparet_wait_queue;
1346 rf_sparet_wait_queue = req;
1347 wakeup(&rf_sparet_wait_queue);
1348
1349 /* mpsleep unlocks the mutex */
1350 while (!rf_sparet_resp_queue) {
1351 tsleep(&rf_sparet_resp_queue, PRIBIO,
1352 "raidframe getsparetable", 0);
1353 }
1354 req = rf_sparet_resp_queue;
1355 rf_sparet_resp_queue = req->next;
1356 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1357
1358 retcode = req->fcol;
1359 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1360 * alloc'd */
1361 return (retcode);
1362 }
1363
1364 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1365 * bp & passes it down.
1366 * any calls originating in the kernel must use non-blocking I/O
1367 * do some extra sanity checking to return "appropriate" error values for
1368 * certain conditions (to make some standard utilities work)
1369 *
1370 * Formerly known as: rf_DoAccessKernel
1371 */
1372 void
1373 raidstart(raidPtr)
1374 RF_Raid_t *raidPtr;
1375 {
1376 RF_SectorCount_t num_blocks, pb, sum;
1377 RF_RaidAddr_t raid_addr;
1378 int retcode;
1379 struct partition *pp;
1380 daddr_t blocknum;
1381 int unit;
1382 struct raid_softc *rs;
1383 int do_async;
1384 struct buf *bp;
1385 struct buf *dp;
1386
1387 unit = raidPtr->raidid;
1388 rs = &raid_softc[unit];
1389
1390 /* Check to see if we're at the limit... */
1391 RF_LOCK_MUTEX(raidPtr->mutex);
1392 while (raidPtr->openings > 0) {
1393 RF_UNLOCK_MUTEX(raidPtr->mutex);
1394
1395 /* get the next item, if any, from the queue */
1396 dp = &rs->buf_queue;
1397 bp = dp->b_actf;
1398 if (bp == NULL) {
1399 /* nothing more to do */
1400 return;
1401 }
1402
1403 /* update structures */
1404 dp = bp->b_actf;
1405 if (dp != NULL) {
1406 dp->b_actb = bp->b_actb;
1407 } else {
1408 rs->buf_queue.b_actb = bp->b_actb;
1409 }
1410 *bp->b_actb = dp;
1411
1412 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1413 * partition.. Need to make it absolute to the underlying
1414 * device.. */
1415
1416 blocknum = bp->b_blkno;
1417 if (DISKPART(bp->b_dev) != RAW_PART) {
1418 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1419 blocknum += pp->p_offset;
1420 }
1421
1422 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1423 (int) blocknum));
1424
1425 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1426 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1427
1428 /* *THIS* is where we adjust what block we're going to...
1429 * but DO NOT TOUCH bp->b_blkno!!! */
1430 raid_addr = blocknum;
1431
1432 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1433 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1434 sum = raid_addr + num_blocks + pb;
1435 if (1 || rf_debugKernelAccess) {
1436 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1437 (int) raid_addr, (int) sum, (int) num_blocks,
1438 (int) pb, (int) bp->b_resid));
1439 }
1440 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1441 || (sum < num_blocks) || (sum < pb)) {
1442 bp->b_error = ENOSPC;
1443 bp->b_flags |= B_ERROR;
1444 bp->b_resid = bp->b_bcount;
1445 biodone(bp);
1446 RF_LOCK_MUTEX(raidPtr->mutex);
1447 continue;
1448 }
1449 /*
1450 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1451 */
1452
1453 if (bp->b_bcount & raidPtr->sectorMask) {
1454 bp->b_error = EINVAL;
1455 bp->b_flags |= B_ERROR;
1456 bp->b_resid = bp->b_bcount;
1457 biodone(bp);
1458 RF_LOCK_MUTEX(raidPtr->mutex);
1459 continue;
1460
1461 }
1462 db1_printf(("Calling DoAccess..\n"));
1463
1464
1465 RF_LOCK_MUTEX(raidPtr->mutex);
1466 raidPtr->openings--;
1467 RF_UNLOCK_MUTEX(raidPtr->mutex);
1468
1469 /*
1470 * Everything is async.
1471 */
1472 do_async = 1;
1473
1474 /* don't ever condition on bp->b_flags & B_WRITE.
1475 * always condition on B_READ instead */
1476
1477 /* XXX we're still at splbio() here... do we *really*
1478 need to be? */
1479
1480
1481 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1482 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1483 do_async, raid_addr, num_blocks,
1484 bp->b_un.b_addr, bp, NULL, NULL,
1485 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1486
1487
1488 RF_LOCK_MUTEX(raidPtr->mutex);
1489 }
1490 RF_UNLOCK_MUTEX(raidPtr->mutex);
1491 }
1492
1493
1494
1495
1496 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1497
1498 int
1499 rf_DispatchKernelIO(queue, req)
1500 RF_DiskQueue_t *queue;
1501 RF_DiskQueueData_t *req;
1502 {
1503 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1504 struct buf *bp;
1505 struct raidbuf *raidbp = NULL;
1506 struct raid_softc *rs;
1507 int unit;
1508 int s;
1509
1510 s=0;
1511 /* s = splbio();*/ /* want to test this */
1512 /* XXX along with the vnode, we also need the softc associated with
1513 * this device.. */
1514
1515 req->queue = queue;
1516
1517 unit = queue->raidPtr->raidid;
1518
1519 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1520
1521 if (unit >= numraid) {
1522 printf("Invalid unit number: %d %d\n", unit, numraid);
1523 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1524 }
1525 rs = &raid_softc[unit];
1526
1527 /* XXX is this the right place? */
1528 disk_busy(&rs->sc_dkdev);
1529
1530 bp = req->bp;
1531 #if 1
1532 /* XXX when there is a physical disk failure, someone is passing us a
1533 * buffer that contains old stuff!! Attempt to deal with this problem
1534 * without taking a performance hit... (not sure where the real bug
1535 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1536
1537 if (bp->b_flags & B_ERROR) {
1538 bp->b_flags &= ~B_ERROR;
1539 }
1540 if (bp->b_error != 0) {
1541 bp->b_error = 0;
1542 }
1543 #endif
1544 raidbp = RAIDGETBUF(rs);
1545
1546 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1547
1548 /*
1549 * context for raidiodone
1550 */
1551 raidbp->rf_obp = bp;
1552 raidbp->req = req;
1553
1554 LIST_INIT(&raidbp->rf_buf.b_dep);
1555
1556 switch (req->type) {
1557 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1558 /* XXX need to do something extra here.. */
1559 /* I'm leaving this in, as I've never actually seen it used,
1560 * and I'd like folks to report it... GO */
1561 printf(("WAKEUP CALLED\n"));
1562 queue->numOutstanding++;
1563
1564 /* XXX need to glue the original buffer into this?? */
1565
1566 KernelWakeupFunc(&raidbp->rf_buf);
1567 break;
1568
1569 case RF_IO_TYPE_READ:
1570 case RF_IO_TYPE_WRITE:
1571
1572 if (req->tracerec) {
1573 RF_ETIMER_START(req->tracerec->timer);
1574 }
1575 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1576 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1577 req->sectorOffset, req->numSector,
1578 req->buf, KernelWakeupFunc, (void *) req,
1579 queue->raidPtr->logBytesPerSector, req->b_proc);
1580
1581 if (rf_debugKernelAccess) {
1582 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1583 (long) bp->b_blkno));
1584 }
1585 queue->numOutstanding++;
1586 queue->last_deq_sector = req->sectorOffset;
1587 /* acc wouldn't have been let in if there were any pending
1588 * reqs at any other priority */
1589 queue->curPriority = req->priority;
1590
1591 db1_printf(("Going for %c to unit %d row %d col %d\n",
1592 req->type, unit, queue->row, queue->col));
1593 db1_printf(("sector %d count %d (%d bytes) %d\n",
1594 (int) req->sectorOffset, (int) req->numSector,
1595 (int) (req->numSector <<
1596 queue->raidPtr->logBytesPerSector),
1597 (int) queue->raidPtr->logBytesPerSector));
1598 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1599 raidbp->rf_buf.b_vp->v_numoutput++;
1600 }
1601 VOP_STRATEGY(&raidbp->rf_buf);
1602
1603 break;
1604
1605 default:
1606 panic("bad req->type in rf_DispatchKernelIO");
1607 }
1608 db1_printf(("Exiting from DispatchKernelIO\n"));
1609 /* splx(s); */ /* want to test this */
1610 return (0);
1611 }
1612 /* this is the callback function associated with a I/O invoked from
1613 kernel code.
1614 */
1615 static void
1616 KernelWakeupFunc(vbp)
1617 struct buf *vbp;
1618 {
1619 RF_DiskQueueData_t *req = NULL;
1620 RF_DiskQueue_t *queue;
1621 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1622 struct buf *bp;
1623 struct raid_softc *rs;
1624 int unit;
1625 register int s;
1626
1627 s = splbio();
1628 db1_printf(("recovering the request queue:\n"));
1629 req = raidbp->req;
1630
1631 bp = raidbp->rf_obp;
1632
1633 queue = (RF_DiskQueue_t *) req->queue;
1634
1635 if (raidbp->rf_buf.b_flags & B_ERROR) {
1636 bp->b_flags |= B_ERROR;
1637 bp->b_error = raidbp->rf_buf.b_error ?
1638 raidbp->rf_buf.b_error : EIO;
1639 }
1640
1641 /* XXX methinks this could be wrong... */
1642 #if 1
1643 bp->b_resid = raidbp->rf_buf.b_resid;
1644 #endif
1645
1646 if (req->tracerec) {
1647 RF_ETIMER_STOP(req->tracerec->timer);
1648 RF_ETIMER_EVAL(req->tracerec->timer);
1649 RF_LOCK_MUTEX(rf_tracing_mutex);
1650 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1651 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1652 req->tracerec->num_phys_ios++;
1653 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1654 }
1655 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1656
1657 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1658
1659
1660 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1661 * ballistic, and mark the component as hosed... */
1662
1663 if (bp->b_flags & B_ERROR) {
1664 /* Mark the disk as dead */
1665 /* but only mark it once... */
1666 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1667 rf_ds_optimal) {
1668 printf("raid%d: IO Error. Marking %s as failed.\n",
1669 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1670 queue->raidPtr->Disks[queue->row][queue->col].status =
1671 rf_ds_failed;
1672 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1673 queue->raidPtr->numFailures++;
1674 /* XXX here we should bump the version number for each component, and write that data out */
1675 } else { /* Disk is already dead... */
1676 /* printf("Disk already marked as dead!\n"); */
1677 }
1678
1679 }
1680
1681 rs = &raid_softc[unit];
1682 RAIDPUTBUF(rs, raidbp);
1683
1684
1685 if (bp->b_resid == 0) {
1686 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1687 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1688 }
1689
1690 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1691 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1692
1693 splx(s);
1694 }
1695
1696
1697
1698 /*
1699 * initialize a buf structure for doing an I/O in the kernel.
1700 */
1701 static void
1702 InitBP(
1703 struct buf * bp,
1704 struct vnode * b_vp,
1705 unsigned rw_flag,
1706 dev_t dev,
1707 RF_SectorNum_t startSect,
1708 RF_SectorCount_t numSect,
1709 caddr_t buf,
1710 void (*cbFunc) (struct buf *),
1711 void *cbArg,
1712 int logBytesPerSector,
1713 struct proc * b_proc)
1714 {
1715 /* bp->b_flags = B_PHYS | rw_flag; */
1716 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1717 bp->b_bcount = numSect << logBytesPerSector;
1718 bp->b_bufsize = bp->b_bcount;
1719 bp->b_error = 0;
1720 bp->b_dev = dev;
1721 bp->b_un.b_addr = buf;
1722 bp->b_blkno = startSect;
1723 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1724 if (bp->b_bcount == 0) {
1725 panic("bp->b_bcount is zero in InitBP!!\n");
1726 }
1727 bp->b_proc = b_proc;
1728 bp->b_iodone = cbFunc;
1729 bp->b_vp = b_vp;
1730
1731 }
1732
1733 static void
1734 raidgetdefaultlabel(raidPtr, rs, lp)
1735 RF_Raid_t *raidPtr;
1736 struct raid_softc *rs;
1737 struct disklabel *lp;
1738 {
1739 db1_printf(("Building a default label...\n"));
1740 bzero(lp, sizeof(*lp));
1741
1742 /* fabricate a label... */
1743 lp->d_secperunit = raidPtr->totalSectors;
1744 lp->d_secsize = raidPtr->bytesPerSector;
1745 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1746 lp->d_ntracks = 1;
1747 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1748 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1749
1750 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1751 lp->d_type = DTYPE_RAID;
1752 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1753 lp->d_rpm = 3600;
1754 lp->d_interleave = 1;
1755 lp->d_flags = 0;
1756
1757 lp->d_partitions[RAW_PART].p_offset = 0;
1758 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1759 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1760 lp->d_npartitions = RAW_PART + 1;
1761
1762 lp->d_magic = DISKMAGIC;
1763 lp->d_magic2 = DISKMAGIC;
1764 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1765
1766 }
1767 /*
1768 * Read the disklabel from the raid device. If one is not present, fake one
1769 * up.
1770 */
1771 static void
1772 raidgetdisklabel(dev)
1773 dev_t dev;
1774 {
1775 int unit = raidunit(dev);
1776 struct raid_softc *rs = &raid_softc[unit];
1777 char *errstring;
1778 struct disklabel *lp = rs->sc_dkdev.dk_label;
1779 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1780 RF_Raid_t *raidPtr;
1781
1782 db1_printf(("Getting the disklabel...\n"));
1783
1784 bzero(clp, sizeof(*clp));
1785
1786 raidPtr = raidPtrs[unit];
1787
1788 raidgetdefaultlabel(raidPtr, rs, lp);
1789
1790 /*
1791 * Call the generic disklabel extraction routine.
1792 */
1793 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1794 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1795 if (errstring)
1796 raidmakedisklabel(rs);
1797 else {
1798 int i;
1799 struct partition *pp;
1800
1801 /*
1802 * Sanity check whether the found disklabel is valid.
1803 *
1804 * This is necessary since total size of the raid device
1805 * may vary when an interleave is changed even though exactly
1806 * same componets are used, and old disklabel may used
1807 * if that is found.
1808 */
1809 if (lp->d_secperunit != rs->sc_size)
1810 printf("WARNING: %s: "
1811 "total sector size in disklabel (%d) != "
1812 "the size of raid (%ld)\n", rs->sc_xname,
1813 lp->d_secperunit, (long) rs->sc_size);
1814 for (i = 0; i < lp->d_npartitions; i++) {
1815 pp = &lp->d_partitions[i];
1816 if (pp->p_offset + pp->p_size > rs->sc_size)
1817 printf("WARNING: %s: end of partition `%c' "
1818 "exceeds the size of raid (%ld)\n",
1819 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1820 }
1821 }
1822
1823 }
1824 /*
1825 * Take care of things one might want to take care of in the event
1826 * that a disklabel isn't present.
1827 */
1828 static void
1829 raidmakedisklabel(rs)
1830 struct raid_softc *rs;
1831 {
1832 struct disklabel *lp = rs->sc_dkdev.dk_label;
1833 db1_printf(("Making a label..\n"));
1834
1835 /*
1836 * For historical reasons, if there's no disklabel present
1837 * the raw partition must be marked FS_BSDFFS.
1838 */
1839
1840 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1841
1842 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1843
1844 lp->d_checksum = dkcksum(lp);
1845 }
1846 /*
1847 * Lookup the provided name in the filesystem. If the file exists,
1848 * is a valid block device, and isn't being used by anyone else,
1849 * set *vpp to the file's vnode.
1850 * You'll find the original of this in ccd.c
1851 */
1852 int
1853 raidlookup(path, p, vpp)
1854 char *path;
1855 struct proc *p;
1856 struct vnode **vpp; /* result */
1857 {
1858 struct nameidata nd;
1859 struct vnode *vp;
1860 struct vattr va;
1861 int error;
1862
1863 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1864 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1865 #ifdef DEBUG
1866 printf("RAIDframe: vn_open returned %d\n", error);
1867 #endif
1868 return (error);
1869 }
1870 vp = nd.ni_vp;
1871 if (vp->v_usecount > 1) {
1872 VOP_UNLOCK(vp, 0);
1873 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1874 return (EBUSY);
1875 }
1876 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1877 VOP_UNLOCK(vp, 0);
1878 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1879 return (error);
1880 }
1881 /* XXX: eventually we should handle VREG, too. */
1882 if (va.va_type != VBLK) {
1883 VOP_UNLOCK(vp, 0);
1884 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1885 return (ENOTBLK);
1886 }
1887 VOP_UNLOCK(vp, 0);
1888 *vpp = vp;
1889 return (0);
1890 }
1891 /*
1892 * Wait interruptibly for an exclusive lock.
1893 *
1894 * XXX
1895 * Several drivers do this; it should be abstracted and made MP-safe.
1896 * (Hmm... where have we seen this warning before :-> GO )
1897 */
1898 static int
1899 raidlock(rs)
1900 struct raid_softc *rs;
1901 {
1902 int error;
1903
1904 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1905 rs->sc_flags |= RAIDF_WANTED;
1906 if ((error =
1907 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1908 return (error);
1909 }
1910 rs->sc_flags |= RAIDF_LOCKED;
1911 return (0);
1912 }
1913 /*
1914 * Unlock and wake up any waiters.
1915 */
1916 static void
1917 raidunlock(rs)
1918 struct raid_softc *rs;
1919 {
1920
1921 rs->sc_flags &= ~RAIDF_LOCKED;
1922 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1923 rs->sc_flags &= ~RAIDF_WANTED;
1924 wakeup(rs);
1925 }
1926 }
1927
1928
1929 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1930 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1931
1932 int
1933 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1934 {
1935 RF_ComponentLabel_t component_label;
1936 raidread_component_label(dev, b_vp, &component_label);
1937 component_label.mod_counter = mod_counter;
1938 component_label.clean = RF_RAID_CLEAN;
1939 raidwrite_component_label(dev, b_vp, &component_label);
1940 return(0);
1941 }
1942
1943
1944 int
1945 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1946 {
1947 RF_ComponentLabel_t component_label;
1948 raidread_component_label(dev, b_vp, &component_label);
1949 component_label.mod_counter = mod_counter;
1950 component_label.clean = RF_RAID_DIRTY;
1951 raidwrite_component_label(dev, b_vp, &component_label);
1952 return(0);
1953 }
1954
1955 /* ARGSUSED */
1956 int
1957 raidread_component_label(dev, b_vp, component_label)
1958 dev_t dev;
1959 struct vnode *b_vp;
1960 RF_ComponentLabel_t *component_label;
1961 {
1962 struct buf *bp;
1963 int error;
1964
1965 /* XXX should probably ensure that we don't try to do this if
1966 someone has changed rf_protected_sectors. */
1967
1968 /* get a block of the appropriate size... */
1969 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1970 bp->b_dev = dev;
1971
1972 /* get our ducks in a row for the read */
1973 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1974 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1975 bp->b_flags = B_BUSY | B_READ;
1976 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1977
1978 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1979
1980 error = biowait(bp);
1981
1982 if (!error) {
1983 memcpy(component_label, bp->b_un.b_addr,
1984 sizeof(RF_ComponentLabel_t));
1985 #if 0
1986 printf("raidread_component_label: got component label:\n");
1987 printf("Version: %d\n",component_label->version);
1988 printf("Serial Number: %d\n",component_label->serial_number);
1989 printf("Mod counter: %d\n",component_label->mod_counter);
1990 printf("Row: %d\n", component_label->row);
1991 printf("Column: %d\n", component_label->column);
1992 printf("Num Rows: %d\n", component_label->num_rows);
1993 printf("Num Columns: %d\n", component_label->num_columns);
1994 printf("Clean: %d\n", component_label->clean);
1995 printf("Status: %d\n", component_label->status);
1996 #endif
1997 } else {
1998 printf("Failed to read RAID component label!\n");
1999 }
2000
2001 bp->b_flags = B_INVAL | B_AGE;
2002 brelse(bp);
2003 return(error);
2004 }
2005 /* ARGSUSED */
2006 int
2007 raidwrite_component_label(dev, b_vp, component_label)
2008 dev_t dev;
2009 struct vnode *b_vp;
2010 RF_ComponentLabel_t *component_label;
2011 {
2012 struct buf *bp;
2013 int error;
2014
2015 /* get a block of the appropriate size... */
2016 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2017 bp->b_dev = dev;
2018
2019 /* get our ducks in a row for the write */
2020 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2021 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2022 bp->b_flags = B_BUSY | B_WRITE;
2023 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2024
2025 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2026
2027 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2028
2029 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2030 error = biowait(bp);
2031 bp->b_flags = B_INVAL | B_AGE;
2032 brelse(bp);
2033 if (error) {
2034 printf("Failed to write RAID component info!\n");
2035 }
2036
2037 return(error);
2038 }
2039
2040 void
2041 rf_markalldirty( raidPtr )
2042 RF_Raid_t *raidPtr;
2043 {
2044 RF_ComponentLabel_t c_label;
2045 int r,c;
2046
2047 raidPtr->mod_counter++;
2048 for (r = 0; r < raidPtr->numRow; r++) {
2049 for (c = 0; c < raidPtr->numCol; c++) {
2050 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2051 raidread_component_label(
2052 raidPtr->Disks[r][c].dev,
2053 raidPtr->raid_cinfo[r][c].ci_vp,
2054 &c_label);
2055 if (c_label.status == rf_ds_spared) {
2056 /* XXX do something special...
2057 but whatever you do, don't
2058 try to access it!! */
2059 } else {
2060 #if 0
2061 c_label.status =
2062 raidPtr->Disks[r][c].status;
2063 raidwrite_component_label(
2064 raidPtr->Disks[r][c].dev,
2065 raidPtr->raid_cinfo[r][c].ci_vp,
2066 &c_label);
2067 #endif
2068 raidmarkdirty(
2069 raidPtr->Disks[r][c].dev,
2070 raidPtr->raid_cinfo[r][c].ci_vp,
2071 raidPtr->mod_counter);
2072 }
2073 }
2074 }
2075 }
2076 /* printf("Component labels marked dirty.\n"); */
2077 #if 0
2078 for( c = 0; c < raidPtr->numSpare ; c++) {
2079 sparecol = raidPtr->numCol + c;
2080 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2081 /*
2082
2083 XXX this is where we get fancy and map this spare
2084 into it's correct spot in the array.
2085
2086 */
2087 /*
2088
2089 we claim this disk is "optimal" if it's
2090 rf_ds_used_spare, as that means it should be
2091 directly substitutable for the disk it replaced.
2092 We note that too...
2093
2094 */
2095
2096 for(i=0;i<raidPtr->numRow;i++) {
2097 for(j=0;j<raidPtr->numCol;j++) {
2098 if ((raidPtr->Disks[i][j].spareRow ==
2099 r) &&
2100 (raidPtr->Disks[i][j].spareCol ==
2101 sparecol)) {
2102 srow = r;
2103 scol = sparecol;
2104 break;
2105 }
2106 }
2107 }
2108
2109 raidread_component_label(
2110 raidPtr->Disks[r][sparecol].dev,
2111 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2112 &c_label);
2113 /* make sure status is noted */
2114 c_label.version = RF_COMPONENT_LABEL_VERSION;
2115 c_label.mod_counter = raidPtr->mod_counter;
2116 c_label.serial_number = raidPtr->serial_number;
2117 c_label.row = srow;
2118 c_label.column = scol;
2119 c_label.num_rows = raidPtr->numRow;
2120 c_label.num_columns = raidPtr->numCol;
2121 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2122 c_label.status = rf_ds_optimal;
2123 raidwrite_component_label(
2124 raidPtr->Disks[r][sparecol].dev,
2125 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2126 &c_label);
2127 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2128 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2129 }
2130 }
2131
2132 #endif
2133 }
2134
2135
2136 void
2137 rf_update_component_labels( raidPtr )
2138 RF_Raid_t *raidPtr;
2139 {
2140 RF_ComponentLabel_t c_label;
2141 int sparecol;
2142 int r,c;
2143 int i,j;
2144 int srow, scol;
2145
2146 srow = -1;
2147 scol = -1;
2148
2149 /* XXX should do extra checks to make sure things really are clean,
2150 rather than blindly setting the clean bit... */
2151
2152 raidPtr->mod_counter++;
2153
2154 for (r = 0; r < raidPtr->numRow; r++) {
2155 for (c = 0; c < raidPtr->numCol; c++) {
2156 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2157 raidread_component_label(
2158 raidPtr->Disks[r][c].dev,
2159 raidPtr->raid_cinfo[r][c].ci_vp,
2160 &c_label);
2161 /* make sure status is noted */
2162 c_label.status = rf_ds_optimal;
2163 raidwrite_component_label(
2164 raidPtr->Disks[r][c].dev,
2165 raidPtr->raid_cinfo[r][c].ci_vp,
2166 &c_label);
2167 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2168 raidmarkclean(
2169 raidPtr->Disks[r][c].dev,
2170 raidPtr->raid_cinfo[r][c].ci_vp,
2171 raidPtr->mod_counter);
2172 }
2173 }
2174 /* else we don't touch it.. */
2175 #if 0
2176 else if (raidPtr->Disks[r][c].status !=
2177 rf_ds_failed) {
2178 raidread_component_label(
2179 raidPtr->Disks[r][c].dev,
2180 raidPtr->raid_cinfo[r][c].ci_vp,
2181 &c_label);
2182 /* make sure status is noted */
2183 c_label.status =
2184 raidPtr->Disks[r][c].status;
2185 raidwrite_component_label(
2186 raidPtr->Disks[r][c].dev,
2187 raidPtr->raid_cinfo[r][c].ci_vp,
2188 &c_label);
2189 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2190 raidmarkclean(
2191 raidPtr->Disks[r][c].dev,
2192 raidPtr->raid_cinfo[r][c].ci_vp,
2193 raidPtr->mod_counter);
2194 }
2195 }
2196 #endif
2197 }
2198 }
2199
2200 for( c = 0; c < raidPtr->numSpare ; c++) {
2201 sparecol = raidPtr->numCol + c;
2202 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2203 /*
2204
2205 we claim this disk is "optimal" if it's
2206 rf_ds_used_spare, as that means it should be
2207 directly substitutable for the disk it replaced.
2208 We note that too...
2209
2210 */
2211
2212 for(i=0;i<raidPtr->numRow;i++) {
2213 for(j=0;j<raidPtr->numCol;j++) {
2214 if ((raidPtr->Disks[i][j].spareRow ==
2215 0) &&
2216 (raidPtr->Disks[i][j].spareCol ==
2217 sparecol)) {
2218 srow = i;
2219 scol = j;
2220 break;
2221 }
2222 }
2223 }
2224
2225 raidread_component_label(
2226 raidPtr->Disks[0][sparecol].dev,
2227 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2228 &c_label);
2229 /* make sure status is noted */
2230 c_label.version = RF_COMPONENT_LABEL_VERSION;
2231 c_label.mod_counter = raidPtr->mod_counter;
2232 c_label.serial_number = raidPtr->serial_number;
2233 c_label.row = srow;
2234 c_label.column = scol;
2235 c_label.num_rows = raidPtr->numRow;
2236 c_label.num_columns = raidPtr->numCol;
2237 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2238 c_label.status = rf_ds_optimal;
2239 raidwrite_component_label(
2240 raidPtr->Disks[0][sparecol].dev,
2241 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2242 &c_label);
2243 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2244 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2245 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2246 raidPtr->mod_counter);
2247 }
2248 }
2249 }
2250 /* printf("Component labels updated\n"); */
2251 }
2252
2253 void
2254 rf_ReconThread(req)
2255 struct rf_recon_req *req;
2256 {
2257 int s;
2258 RF_Raid_t *raidPtr;
2259
2260 s = splbio();
2261 raidPtr = (RF_Raid_t *) req->raidPtr;
2262 raidPtr->recon_in_progress = 1;
2263
2264 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2265 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2266
2267 /* XXX get rid of this! we don't need it at all.. */
2268 RF_Free(req, sizeof(*req));
2269
2270 raidPtr->recon_in_progress = 0;
2271 splx(s);
2272
2273 /* That's all... */
2274 kthread_exit(0); /* does not return */
2275 }
2276
2277 void
2278 rf_RewriteParityThread(raidPtr)
2279 RF_Raid_t *raidPtr;
2280 {
2281 int retcode;
2282 int s;
2283
2284 raidPtr->parity_rewrite_in_progress = 1;
2285 s = splbio();
2286 retcode = rf_RewriteParity(raidPtr);
2287 splx(s);
2288 if (retcode) {
2289 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2290 } else {
2291 /* set the clean bit! If we shutdown correctly,
2292 the clean bit on each component label will get
2293 set */
2294 raidPtr->parity_good = RF_RAID_CLEAN;
2295 }
2296 raidPtr->parity_rewrite_in_progress = 0;
2297
2298 /* That's all... */
2299 kthread_exit(0); /* does not return */
2300 }
2301
2302
2303 void
2304 rf_CopybackThread(raidPtr)
2305 RF_Raid_t *raidPtr;
2306 {
2307 int s;
2308
2309 raidPtr->copyback_in_progress = 1;
2310 s = splbio();
2311 rf_CopybackReconstructedData(raidPtr);
2312 splx(s);
2313 raidPtr->copyback_in_progress = 0;
2314
2315 /* That's all... */
2316 kthread_exit(0); /* does not return */
2317 }
2318
2319
2320 void
2321 rf_ReconstructInPlaceThread(req)
2322 struct rf_recon_req *req;
2323 {
2324 int retcode;
2325 int s;
2326 RF_Raid_t *raidPtr;
2327
2328 s = splbio();
2329 raidPtr = req->raidPtr;
2330 raidPtr->recon_in_progress = 1;
2331 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2332 RF_Free(req, sizeof(*req));
2333 raidPtr->recon_in_progress = 0;
2334 splx(s);
2335
2336 /* That's all... */
2337 kthread_exit(0); /* does not return */
2338 }
2339