rf_netbsdkintf.c revision 1.46 1 /* $NetBSD: rf_netbsdkintf.c,v 1.46 2000/01/09 03:39:13 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_copyback.h"
141 #include "rf_dag.h"
142 #include "rf_dagflags.h"
143 #include "rf_diskqueue.h"
144 #include "rf_acctrace.h"
145 #include "rf_etimer.h"
146 #include "rf_general.h"
147 #include "rf_debugMem.h"
148 #include "rf_kintf.h"
149 #include "rf_options.h"
150 #include "rf_driver.h"
151 #include "rf_parityscan.h"
152 #include "rf_debugprint.h"
153 #include "rf_threadstuff.h"
154
155 int rf_kdebug_level = 0;
156
157 #ifdef DEBUG
158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
159 #else /* DEBUG */
160 #define db1_printf(a) { }
161 #endif /* DEBUG */
162
163 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
164
165 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
166
167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
168 * spare table */
169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
170 * installation process */
171
172 /* prototypes */
173 static void KernelWakeupFunc(struct buf * bp);
174 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
175 dev_t dev, RF_SectorNum_t startSect,
176 RF_SectorCount_t numSect, caddr_t buf,
177 void (*cbFunc) (struct buf *), void *cbArg,
178 int logBytesPerSector, struct proc * b_proc);
179 static int raidinit __P((dev_t, RF_Raid_t *, int));
180
181 void raidattach __P((int));
182 int raidsize __P((dev_t));
183 int raidopen __P((dev_t, int, int, struct proc *));
184 int raidclose __P((dev_t, int, int, struct proc *));
185 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
186 int raidwrite __P((dev_t, struct uio *, int));
187 int raidread __P((dev_t, struct uio *, int));
188 void raidstrategy __P((struct buf *));
189 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
190
191 /*
192 * Pilfered from ccd.c
193 */
194
195 struct raidbuf {
196 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
197 struct buf *rf_obp; /* ptr. to original I/O buf */
198 int rf_flags; /* misc. flags */
199 RF_DiskQueueData_t *req;/* the request that this was part of.. */
200 };
201
202
203 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
204 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
205
206 /* XXX Not sure if the following should be replacing the raidPtrs above,
207 or if it should be used in conjunction with that... */
208
209 struct raid_softc {
210 int sc_flags; /* flags */
211 int sc_cflags; /* configuration flags */
212 size_t sc_size; /* size of the raid device */
213 dev_t sc_dev; /* our device.. */
214 char sc_xname[20]; /* XXX external name */
215 struct disk sc_dkdev; /* generic disk device info */
216 struct pool sc_cbufpool; /* component buffer pool */
217 struct buf buf_queue; /* used for the device queue */
218 };
219 /* sc_flags */
220 #define RAIDF_INITED 0x01 /* unit has been initialized */
221 #define RAIDF_WLABEL 0x02 /* label area is writable */
222 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
223 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
224 #define RAIDF_LOCKED 0x80 /* unit is locked */
225
226 #define raidunit(x) DISKUNIT(x)
227 static int numraid = 0;
228
229 /*
230 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
231 * Be aware that large numbers can allow the driver to consume a lot of
232 * kernel memory, especially on writes, and in degraded mode reads.
233 *
234 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
235 * a single 64K write will typically require 64K for the old data,
236 * 64K for the old parity, and 64K for the new parity, for a total
237 * of 192K (if the parity buffer is not re-used immediately).
238 * Even it if is used immedately, that's still 128K, which when multiplied
239 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
240 *
241 * Now in degraded mode, for example, a 64K read on the above setup may
242 * require data reconstruction, which will require *all* of the 4 remaining
243 * disks to participate -- 4 * 32K/disk == 128K again.
244 */
245
246 #ifndef RAIDOUTSTANDING
247 #define RAIDOUTSTANDING 6
248 #endif
249
250 #define RAIDLABELDEV(dev) \
251 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
252
253 /* declared here, and made public, for the benefit of KVM stuff.. */
254 struct raid_softc *raid_softc;
255
256 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
257 struct disklabel *));
258 static void raidgetdisklabel __P((dev_t));
259 static void raidmakedisklabel __P((struct raid_softc *));
260
261 static int raidlock __P((struct raid_softc *));
262 static void raidunlock __P((struct raid_softc *));
263
264 static void rf_markalldirty __P((RF_Raid_t *));
265
266 void rf_ReconThread __P((struct rf_recon_req *));
267 /* XXX what I want is: */
268 /*void rf_ReconThread __P((RF_Raid_t *raidPtr)); */
269 void rf_RewriteParityThread __P((RF_Raid_t *raidPtr));
270 void rf_CopybackThread __P((RF_Raid_t *raidPtr));
271 void rf_ReconstructInPlaceThread __P((struct rf_recon_req *));
272
273 void
274 raidattach(num)
275 int num;
276 {
277 int raidID;
278 int i, rc;
279
280 #ifdef DEBUG
281 printf("raidattach: Asked for %d units\n", num);
282 #endif
283
284 if (num <= 0) {
285 #ifdef DIAGNOSTIC
286 panic("raidattach: count <= 0");
287 #endif
288 return;
289 }
290 /* This is where all the initialization stuff gets done. */
291
292 /* Make some space for requested number of units... */
293
294 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
295 if (raidPtrs == NULL) {
296 panic("raidPtrs is NULL!!\n");
297 }
298
299 rc = rf_mutex_init(&rf_sparet_wait_mutex);
300 if (rc) {
301 RF_PANIC();
302 }
303
304 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
305
306 for (i = 0; i < numraid; i++)
307 raidPtrs[i] = NULL;
308 rc = rf_BootRaidframe();
309 if (rc == 0)
310 printf("Kernelized RAIDframe activated\n");
311 else
312 panic("Serious error booting RAID!!\n");
313
314 /* put together some datastructures like the CCD device does.. This
315 * lets us lock the device and what-not when it gets opened. */
316
317 raid_softc = (struct raid_softc *)
318 malloc(num * sizeof(struct raid_softc),
319 M_RAIDFRAME, M_NOWAIT);
320 if (raid_softc == NULL) {
321 printf("WARNING: no memory for RAIDframe driver\n");
322 return;
323 }
324 numraid = num;
325 bzero(raid_softc, num * sizeof(struct raid_softc));
326
327 for (raidID = 0; raidID < num; raidID++) {
328 raid_softc[raidID].buf_queue.b_actf = NULL;
329 raid_softc[raidID].buf_queue.b_actb =
330 &raid_softc[raidID].buf_queue.b_actf;
331 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
332 (RF_Raid_t *));
333 if (raidPtrs[raidID] == NULL) {
334 printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
335 numraid = raidID;
336 return;
337 }
338 }
339 }
340
341
342 int
343 raidsize(dev)
344 dev_t dev;
345 {
346 struct raid_softc *rs;
347 struct disklabel *lp;
348 int part, unit, omask, size;
349
350 unit = raidunit(dev);
351 if (unit >= numraid)
352 return (-1);
353 rs = &raid_softc[unit];
354
355 if ((rs->sc_flags & RAIDF_INITED) == 0)
356 return (-1);
357
358 part = DISKPART(dev);
359 omask = rs->sc_dkdev.dk_openmask & (1 << part);
360 lp = rs->sc_dkdev.dk_label;
361
362 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
363 return (-1);
364
365 if (lp->d_partitions[part].p_fstype != FS_SWAP)
366 size = -1;
367 else
368 size = lp->d_partitions[part].p_size *
369 (lp->d_secsize / DEV_BSIZE);
370
371 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
372 return (-1);
373
374 return (size);
375
376 }
377
378 int
379 raiddump(dev, blkno, va, size)
380 dev_t dev;
381 daddr_t blkno;
382 caddr_t va;
383 size_t size;
384 {
385 /* Not implemented. */
386 return ENXIO;
387 }
388 /* ARGSUSED */
389 int
390 raidopen(dev, flags, fmt, p)
391 dev_t dev;
392 int flags, fmt;
393 struct proc *p;
394 {
395 int unit = raidunit(dev);
396 struct raid_softc *rs;
397 struct disklabel *lp;
398 int part, pmask;
399 int error = 0;
400
401 if (unit >= numraid)
402 return (ENXIO);
403 rs = &raid_softc[unit];
404
405 if ((error = raidlock(rs)) != 0)
406 return (error);
407 lp = rs->sc_dkdev.dk_label;
408
409 part = DISKPART(dev);
410 pmask = (1 << part);
411
412 db1_printf(("Opening raid device number: %d partition: %d\n",
413 unit, part));
414
415
416 if ((rs->sc_flags & RAIDF_INITED) &&
417 (rs->sc_dkdev.dk_openmask == 0))
418 raidgetdisklabel(dev);
419
420 /* make sure that this partition exists */
421
422 if (part != RAW_PART) {
423 db1_printf(("Not a raw partition..\n"));
424 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
425 ((part >= lp->d_npartitions) ||
426 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
427 error = ENXIO;
428 raidunlock(rs);
429 db1_printf(("Bailing out...\n"));
430 return (error);
431 }
432 }
433 /* Prevent this unit from being unconfigured while open. */
434 switch (fmt) {
435 case S_IFCHR:
436 rs->sc_dkdev.dk_copenmask |= pmask;
437 break;
438
439 case S_IFBLK:
440 rs->sc_dkdev.dk_bopenmask |= pmask;
441 break;
442 }
443
444 if ((rs->sc_dkdev.dk_openmask == 0) &&
445 ((rs->sc_flags & RAIDF_INITED) != 0)) {
446 /* First one... mark things as dirty... Note that we *MUST*
447 have done a configure before this. I DO NOT WANT TO BE
448 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
449 THAT THEY BELONG TOGETHER!!!!! */
450 /* XXX should check to see if we're only open for reading
451 here... If so, we needn't do this, but then need some
452 other way of keeping track of what's happened.. */
453
454 rf_markalldirty( raidPtrs[unit] );
455 }
456
457
458 rs->sc_dkdev.dk_openmask =
459 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
460
461 raidunlock(rs);
462
463 return (error);
464
465
466 }
467 /* ARGSUSED */
468 int
469 raidclose(dev, flags, fmt, p)
470 dev_t dev;
471 int flags, fmt;
472 struct proc *p;
473 {
474 int unit = raidunit(dev);
475 struct raid_softc *rs;
476 int error = 0;
477 int part;
478
479 if (unit >= numraid)
480 return (ENXIO);
481 rs = &raid_softc[unit];
482
483 if ((error = raidlock(rs)) != 0)
484 return (error);
485
486 part = DISKPART(dev);
487
488 /* ...that much closer to allowing unconfiguration... */
489 switch (fmt) {
490 case S_IFCHR:
491 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
492 break;
493
494 case S_IFBLK:
495 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
496 break;
497 }
498 rs->sc_dkdev.dk_openmask =
499 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
500
501 if ((rs->sc_dkdev.dk_openmask == 0) &&
502 ((rs->sc_flags & RAIDF_INITED) != 0)) {
503 /* Last one... device is not unconfigured yet.
504 Device shutdown has taken care of setting the
505 clean bits if RAIDF_INITED is not set
506 mark things as clean... */
507 rf_update_component_labels( raidPtrs[unit] );
508 }
509
510 raidunlock(rs);
511 return (0);
512
513 }
514
515 void
516 raidstrategy(bp)
517 register struct buf *bp;
518 {
519 register int s;
520
521 unsigned int raidID = raidunit(bp->b_dev);
522 RF_Raid_t *raidPtr;
523 struct raid_softc *rs = &raid_softc[raidID];
524 struct disklabel *lp;
525 struct buf *dp;
526 int wlabel;
527
528 if ((rs->sc_flags & RAIDF_INITED) ==0) {
529 bp->b_error = ENXIO;
530 bp->b_flags = B_ERROR;
531 bp->b_resid = bp->b_bcount;
532 biodone(bp);
533 return;
534 }
535 if (raidID >= numraid || !raidPtrs[raidID]) {
536 bp->b_error = ENODEV;
537 bp->b_flags |= B_ERROR;
538 bp->b_resid = bp->b_bcount;
539 biodone(bp);
540 return;
541 }
542 raidPtr = raidPtrs[raidID];
543 if (!raidPtr->valid) {
544 bp->b_error = ENODEV;
545 bp->b_flags |= B_ERROR;
546 bp->b_resid = bp->b_bcount;
547 biodone(bp);
548 return;
549 }
550 if (bp->b_bcount == 0) {
551 db1_printf(("b_bcount is zero..\n"));
552 biodone(bp);
553 return;
554 }
555 lp = rs->sc_dkdev.dk_label;
556
557 /*
558 * Do bounds checking and adjust transfer. If there's an
559 * error, the bounds check will flag that for us.
560 */
561
562 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
563 if (DISKPART(bp->b_dev) != RAW_PART)
564 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
565 db1_printf(("Bounds check failed!!:%d %d\n",
566 (int) bp->b_blkno, (int) wlabel));
567 biodone(bp);
568 return;
569 }
570 s = splbio();
571
572 bp->b_resid = 0;
573
574 /* stuff it onto our queue */
575
576 dp = &rs->buf_queue;
577 bp->b_actf = NULL;
578 bp->b_actb = dp->b_actb;
579 *dp->b_actb = bp;
580 dp->b_actb = &bp->b_actf;
581
582 raidstart(raidPtrs[raidID]);
583
584 splx(s);
585 }
586 /* ARGSUSED */
587 int
588 raidread(dev, uio, flags)
589 dev_t dev;
590 struct uio *uio;
591 int flags;
592 {
593 int unit = raidunit(dev);
594 struct raid_softc *rs;
595 int part;
596
597 if (unit >= numraid)
598 return (ENXIO);
599 rs = &raid_softc[unit];
600
601 if ((rs->sc_flags & RAIDF_INITED) == 0)
602 return (ENXIO);
603 part = DISKPART(dev);
604
605 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
606
607 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
608
609 }
610 /* ARGSUSED */
611 int
612 raidwrite(dev, uio, flags)
613 dev_t dev;
614 struct uio *uio;
615 int flags;
616 {
617 int unit = raidunit(dev);
618 struct raid_softc *rs;
619
620 if (unit >= numraid)
621 return (ENXIO);
622 rs = &raid_softc[unit];
623
624 if ((rs->sc_flags & RAIDF_INITED) == 0)
625 return (ENXIO);
626 db1_printf(("raidwrite\n"));
627 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
628
629 }
630
631 int
632 raidioctl(dev, cmd, data, flag, p)
633 dev_t dev;
634 u_long cmd;
635 caddr_t data;
636 int flag;
637 struct proc *p;
638 {
639 int unit = raidunit(dev);
640 int error = 0;
641 int part, pmask;
642 struct raid_softc *rs;
643 RF_Config_t *k_cfg, *u_cfg;
644 RF_Raid_t *raidPtr;
645 RF_AccTotals_t *totals;
646 RF_DeviceConfig_t *d_cfg, **ucfgp;
647 u_char *specific_buf;
648 int retcode = 0;
649 int row;
650 int column;
651 struct rf_recon_req *rrcopy, *rr;
652 RF_ComponentLabel_t *component_label;
653 RF_ComponentLabel_t ci_label;
654 RF_ComponentLabel_t **c_label_ptr;
655 RF_SingleComponent_t *sparePtr,*componentPtr;
656 RF_SingleComponent_t hot_spare;
657 RF_SingleComponent_t component;
658 int i, j, d;
659
660 if (unit >= numraid)
661 return (ENXIO);
662 rs = &raid_softc[unit];
663 raidPtr = raidPtrs[unit];
664
665 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
666 (int) DISKPART(dev), (int) unit, (int) cmd));
667
668 /* Must be open for writes for these commands... */
669 switch (cmd) {
670 case DIOCSDINFO:
671 case DIOCWDINFO:
672 case DIOCWLABEL:
673 if ((flag & FWRITE) == 0)
674 return (EBADF);
675 }
676
677 /* Must be initialized for these... */
678 switch (cmd) {
679 case DIOCGDINFO:
680 case DIOCSDINFO:
681 case DIOCWDINFO:
682 case DIOCGPART:
683 case DIOCWLABEL:
684 case DIOCGDEFLABEL:
685 case RAIDFRAME_SHUTDOWN:
686 case RAIDFRAME_REWRITEPARITY:
687 case RAIDFRAME_GET_INFO:
688 case RAIDFRAME_RESET_ACCTOTALS:
689 case RAIDFRAME_GET_ACCTOTALS:
690 case RAIDFRAME_KEEP_ACCTOTALS:
691 case RAIDFRAME_GET_SIZE:
692 case RAIDFRAME_FAIL_DISK:
693 case RAIDFRAME_COPYBACK:
694 case RAIDFRAME_CHECK_RECON_STATUS:
695 case RAIDFRAME_GET_COMPONENT_LABEL:
696 case RAIDFRAME_SET_COMPONENT_LABEL:
697 case RAIDFRAME_ADD_HOT_SPARE:
698 case RAIDFRAME_REMOVE_HOT_SPARE:
699 case RAIDFRAME_INIT_LABELS:
700 case RAIDFRAME_REBUILD_IN_PLACE:
701 case RAIDFRAME_CHECK_PARITY:
702 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
703 case RAIDFRAME_CHECK_COPYBACK_STATUS:
704 if ((rs->sc_flags & RAIDF_INITED) == 0)
705 return (ENXIO);
706 }
707
708 switch (cmd) {
709
710 /* configure the system */
711 case RAIDFRAME_CONFIGURE:
712 /* copy-in the configuration information */
713 /* data points to a pointer to the configuration structure */
714
715 u_cfg = *((RF_Config_t **) data);
716 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
717 if (k_cfg == NULL) {
718 return (ENOMEM);
719 }
720 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
721 sizeof(RF_Config_t));
722 if (retcode) {
723 RF_Free(k_cfg, sizeof(RF_Config_t));
724 db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
725 retcode));
726 return (retcode);
727 }
728 /* allocate a buffer for the layout-specific data, and copy it
729 * in */
730 if (k_cfg->layoutSpecificSize) {
731 if (k_cfg->layoutSpecificSize > 10000) {
732 /* sanity check */
733 RF_Free(k_cfg, sizeof(RF_Config_t));
734 return (EINVAL);
735 }
736 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
737 (u_char *));
738 if (specific_buf == NULL) {
739 RF_Free(k_cfg, sizeof(RF_Config_t));
740 return (ENOMEM);
741 }
742 retcode = copyin(k_cfg->layoutSpecific,
743 (caddr_t) specific_buf,
744 k_cfg->layoutSpecificSize);
745 if (retcode) {
746 RF_Free(k_cfg, sizeof(RF_Config_t));
747 RF_Free(specific_buf,
748 k_cfg->layoutSpecificSize);
749 db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
750 retcode));
751 return (retcode);
752 }
753 } else
754 specific_buf = NULL;
755 k_cfg->layoutSpecific = specific_buf;
756
757 /* should do some kind of sanity check on the configuration.
758 * Store the sum of all the bytes in the last byte? */
759
760 /* configure the system */
761
762 raidPtr->raidid = unit;
763
764 retcode = rf_Configure(raidPtr, k_cfg);
765
766 if (retcode == 0) {
767
768 /* allow this many simultaneous IO's to
769 this RAID device */
770 raidPtr->openings = RAIDOUTSTANDING;
771
772 /* XXX should be moved to rf_Configure() */
773
774 raidPtr->copyback_in_progress = 0;
775 raidPtr->parity_rewrite_in_progress = 0;
776 raidPtr->recon_in_progress = 0;
777
778 retcode = raidinit(dev, raidPtr, unit);
779 rf_markalldirty( raidPtr );
780 }
781 /* free the buffers. No return code here. */
782 if (k_cfg->layoutSpecificSize) {
783 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
784 }
785 RF_Free(k_cfg, sizeof(RF_Config_t));
786
787 return (retcode);
788
789 /* shutdown the system */
790 case RAIDFRAME_SHUTDOWN:
791
792 if ((error = raidlock(rs)) != 0)
793 return (error);
794
795 /*
796 * If somebody has a partition mounted, we shouldn't
797 * shutdown.
798 */
799
800 part = DISKPART(dev);
801 pmask = (1 << part);
802 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
803 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
804 (rs->sc_dkdev.dk_copenmask & pmask))) {
805 raidunlock(rs);
806 return (EBUSY);
807 }
808
809 retcode = rf_Shutdown(raidPtr);
810
811 pool_destroy(&rs->sc_cbufpool);
812
813 /* It's no longer initialized... */
814 rs->sc_flags &= ~RAIDF_INITED;
815
816 /* Detach the disk. */
817 disk_detach(&rs->sc_dkdev);
818
819 raidunlock(rs);
820
821 return (retcode);
822 case RAIDFRAME_GET_COMPONENT_LABEL:
823 c_label_ptr = (RF_ComponentLabel_t **) data;
824 /* need to read the component label for the disk indicated
825 by row,column in component_label */
826
827 /* For practice, let's get it directly fromdisk, rather
828 than from the in-core copy */
829 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
830 (RF_ComponentLabel_t *));
831 if (component_label == NULL)
832 return (ENOMEM);
833
834 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
835
836 retcode = copyin( *c_label_ptr, component_label,
837 sizeof(RF_ComponentLabel_t));
838
839 if (retcode) {
840 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
841 return(retcode);
842 }
843
844 row = component_label->row;
845 column = component_label->column;
846
847 if ((row < 0) || (row >= raidPtr->numRow) ||
848 (column < 0) || (column >= raidPtr->numCol)) {
849 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
850 return(EINVAL);
851 }
852
853 raidread_component_label(
854 raidPtr->Disks[row][column].dev,
855 raidPtr->raid_cinfo[row][column].ci_vp,
856 component_label );
857
858 retcode = copyout((caddr_t) component_label,
859 (caddr_t) *c_label_ptr,
860 sizeof(RF_ComponentLabel_t));
861 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
862 return (retcode);
863
864 case RAIDFRAME_SET_COMPONENT_LABEL:
865 component_label = (RF_ComponentLabel_t *) data;
866
867 /* XXX check the label for valid stuff... */
868 /* Note that some things *should not* get modified --
869 the user should be re-initing the labels instead of
870 trying to patch things.
871 */
872
873 printf("Got component label:\n");
874 printf("Version: %d\n",component_label->version);
875 printf("Serial Number: %d\n",component_label->serial_number);
876 printf("Mod counter: %d\n",component_label->mod_counter);
877 printf("Row: %d\n", component_label->row);
878 printf("Column: %d\n", component_label->column);
879 printf("Num Rows: %d\n", component_label->num_rows);
880 printf("Num Columns: %d\n", component_label->num_columns);
881 printf("Clean: %d\n", component_label->clean);
882 printf("Status: %d\n", component_label->status);
883
884 row = component_label->row;
885 column = component_label->column;
886
887 if ((row < 0) || (row >= raidPtr->numRow) ||
888 (column < 0) || (column >= raidPtr->numCol)) {
889 return(EINVAL);
890 }
891
892 /* XXX this isn't allowed to do anything for now :-) */
893 #if 0
894 raidwrite_component_label(
895 raidPtr->Disks[row][column].dev,
896 raidPtr->raid_cinfo[row][column].ci_vp,
897 component_label );
898 #endif
899 return (0);
900
901 case RAIDFRAME_INIT_LABELS:
902 component_label = (RF_ComponentLabel_t *) data;
903 /*
904 we only want the serial number from
905 the above. We get all the rest of the information
906 from the config that was used to create this RAID
907 set.
908 */
909
910 raidPtr->serial_number = component_label->serial_number;
911 /* current version number */
912 ci_label.version = RF_COMPONENT_LABEL_VERSION;
913 ci_label.serial_number = component_label->serial_number;
914 ci_label.mod_counter = raidPtr->mod_counter;
915 ci_label.num_rows = raidPtr->numRow;
916 ci_label.num_columns = raidPtr->numCol;
917 ci_label.clean = RF_RAID_DIRTY; /* not clean */
918 ci_label.status = rf_ds_optimal; /* "It's good!" */
919
920 for(row=0;row<raidPtr->numRow;row++) {
921 ci_label.row = row;
922 for(column=0;column<raidPtr->numCol;column++) {
923 ci_label.column = column;
924 raidwrite_component_label(
925 raidPtr->Disks[row][column].dev,
926 raidPtr->raid_cinfo[row][column].ci_vp,
927 &ci_label );
928 }
929 }
930
931 return (retcode);
932
933 /* initialize all parity */
934 case RAIDFRAME_REWRITEPARITY:
935
936 if (raidPtr->Layout.map->faultsTolerated == 0) {
937 /* Parity for RAID 0 is trivially correct */
938 raidPtr->parity_good = RF_RAID_CLEAN;
939 return(0);
940 }
941
942 if (raidPtr->parity_rewrite_in_progress == 1) {
943 /* Re-write is already in progress! */
944 return(EINVAL);
945 }
946
947 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
948 rf_RewriteParityThread,
949 raidPtr,"raid_parity");
950 return (retcode);
951
952
953 case RAIDFRAME_ADD_HOT_SPARE:
954 sparePtr = (RF_SingleComponent_t *) data;
955 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
956 printf("Adding spare\n");
957 retcode = rf_add_hot_spare(raidPtr, &hot_spare);
958 return(retcode);
959
960 case RAIDFRAME_REMOVE_HOT_SPARE:
961 return(retcode);
962
963 case RAIDFRAME_REBUILD_IN_PLACE:
964
965 if (raidPtr->Layout.map->faultsTolerated == 0) {
966 /* Can't do this on a RAID 0!! */
967 return(EINVAL);
968 }
969
970 if (raidPtr->recon_in_progress == 1) {
971 /* a reconstruct is already in progress! */
972 return(EINVAL);
973 }
974
975 componentPtr = (RF_SingleComponent_t *) data;
976 memcpy( &component, componentPtr,
977 sizeof(RF_SingleComponent_t));
978 row = component.row;
979 column = component.column;
980 printf("Rebuild: %d %d\n",row, column);
981 if ((row < 0) || (row >= raidPtr->numRow) ||
982 (column < 0) || (column >= raidPtr->numCol)) {
983 return(EINVAL);
984 }
985
986 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
987 if (rrcopy == NULL)
988 return(ENOMEM);
989
990 rrcopy->raidPtr = (void *) raidPtr;
991 rrcopy->row = row;
992 rrcopy->col = column;
993
994 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
995 rf_ReconstructInPlaceThread,
996 rrcopy,"raid_reconip");
997 return(retcode);
998
999 case RAIDFRAME_GET_INFO:
1000 if (!raidPtr->valid)
1001 return (ENODEV);
1002 ucfgp = (RF_DeviceConfig_t **) data;
1003 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1004 (RF_DeviceConfig_t *));
1005 if (d_cfg == NULL)
1006 return (ENOMEM);
1007 bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1008 d_cfg->rows = raidPtr->numRow;
1009 d_cfg->cols = raidPtr->numCol;
1010 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1011 if (d_cfg->ndevs >= RF_MAX_DISKS) {
1012 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1013 return (ENOMEM);
1014 }
1015 d_cfg->nspares = raidPtr->numSpare;
1016 if (d_cfg->nspares >= RF_MAX_DISKS) {
1017 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1018 return (ENOMEM);
1019 }
1020 d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1021 d = 0;
1022 for (i = 0; i < d_cfg->rows; i++) {
1023 for (j = 0; j < d_cfg->cols; j++) {
1024 d_cfg->devs[d] = raidPtr->Disks[i][j];
1025 d++;
1026 }
1027 }
1028 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1029 d_cfg->spares[i] = raidPtr->Disks[0][j];
1030 }
1031 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1032 sizeof(RF_DeviceConfig_t));
1033 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1034
1035 return (retcode);
1036
1037 case RAIDFRAME_CHECK_PARITY:
1038 *(int *) data = raidPtr->parity_good;
1039 return (0);
1040
1041 case RAIDFRAME_RESET_ACCTOTALS:
1042 bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1043 return (0);
1044
1045 case RAIDFRAME_GET_ACCTOTALS:
1046 totals = (RF_AccTotals_t *) data;
1047 *totals = raidPtr->acc_totals;
1048 return (0);
1049
1050 case RAIDFRAME_KEEP_ACCTOTALS:
1051 raidPtr->keep_acc_totals = *(int *)data;
1052 return (0);
1053
1054 case RAIDFRAME_GET_SIZE:
1055 *(int *) data = raidPtr->totalSectors;
1056 return (0);
1057
1058 /* fail a disk & optionally start reconstruction */
1059 case RAIDFRAME_FAIL_DISK:
1060
1061 if (raidPtr->Layout.map->faultsTolerated == 0) {
1062 /* Can't do this on a RAID 0!! */
1063 return(EINVAL);
1064 }
1065
1066 rr = (struct rf_recon_req *) data;
1067
1068 if (rr->row < 0 || rr->row >= raidPtr->numRow
1069 || rr->col < 0 || rr->col >= raidPtr->numCol)
1070 return (EINVAL);
1071
1072 printf("raid%d: Failing the disk: row: %d col: %d\n",
1073 unit, rr->row, rr->col);
1074
1075 /* make a copy of the recon request so that we don't rely on
1076 * the user's buffer */
1077 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1078 if (rrcopy == NULL)
1079 return(ENOMEM);
1080 bcopy(rr, rrcopy, sizeof(*rr));
1081 rrcopy->raidPtr = (void *) raidPtr;
1082
1083 retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1084 rf_ReconThread,
1085 rrcopy,"raid_recon");
1086 return (0);
1087
1088 /* invoke a copyback operation after recon on whatever disk
1089 * needs it, if any */
1090 case RAIDFRAME_COPYBACK:
1091
1092 if (raidPtr->Layout.map->faultsTolerated == 0) {
1093 /* This makes no sense on a RAID 0!! */
1094 return(EINVAL);
1095 }
1096
1097 if (raidPtr->copyback_in_progress == 1) {
1098 /* Copyback is already in progress! */
1099 return(EINVAL);
1100 }
1101
1102 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1103 rf_CopybackThread,
1104 raidPtr,"raid_copyback");
1105 return (retcode);
1106
1107 /* return the percentage completion of reconstruction */
1108 case RAIDFRAME_CHECK_RECON_STATUS:
1109 if (raidPtr->Layout.map->faultsTolerated == 0) {
1110 /* This makes no sense on a RAID 0 */
1111 return(EINVAL);
1112 }
1113 row = 0; /* XXX we only consider a single row... */
1114 if (raidPtr->status[row] != rf_rs_reconstructing)
1115 *(int *) data = 100;
1116 else
1117 *(int *) data = raidPtr->reconControl[row]->percentComplete;
1118 return (0);
1119
1120 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1121 if (raidPtr->Layout.map->faultsTolerated == 0) {
1122 /* This makes no sense on a RAID 0 */
1123 return(EINVAL);
1124 }
1125 if (raidPtr->parity_rewrite_in_progress == 1) {
1126 *(int *) data = 100 * raidPtr->parity_rewrite_stripes_done / raidPtr->Layout.numStripe;
1127 } else {
1128 *(int *) data = 100;
1129 }
1130 return (0);
1131
1132 case RAIDFRAME_CHECK_COPYBACK_STATUS:
1133 if (raidPtr->Layout.map->faultsTolerated == 0) {
1134 /* This makes no sense on a RAID 0 */
1135 return(EINVAL);
1136 }
1137 if (raidPtr->copyback_in_progress == 1) {
1138 *(int *) data = 100 * raidPtr->copyback_stripes_done /
1139 raidPtr->Layout.numStripe;
1140 } else {
1141 *(int *) data = 100;
1142 }
1143 return (0);
1144
1145
1146 /* the sparetable daemon calls this to wait for the kernel to
1147 * need a spare table. this ioctl does not return until a
1148 * spare table is needed. XXX -- calling mpsleep here in the
1149 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1150 * -- I should either compute the spare table in the kernel,
1151 * or have a different -- XXX XXX -- interface (a different
1152 * character device) for delivering the table -- XXX */
1153 #if 0
1154 case RAIDFRAME_SPARET_WAIT:
1155 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1156 while (!rf_sparet_wait_queue)
1157 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1158 waitreq = rf_sparet_wait_queue;
1159 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1160 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1161
1162 /* structure assignment */
1163 *((RF_SparetWait_t *) data) = *waitreq;
1164
1165 RF_Free(waitreq, sizeof(*waitreq));
1166 return (0);
1167
1168 /* wakes up a process waiting on SPARET_WAIT and puts an error
1169 * code in it that will cause the dameon to exit */
1170 case RAIDFRAME_ABORT_SPARET_WAIT:
1171 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1172 waitreq->fcol = -1;
1173 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1174 waitreq->next = rf_sparet_wait_queue;
1175 rf_sparet_wait_queue = waitreq;
1176 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1177 wakeup(&rf_sparet_wait_queue);
1178 return (0);
1179
1180 /* used by the spare table daemon to deliver a spare table
1181 * into the kernel */
1182 case RAIDFRAME_SEND_SPARET:
1183
1184 /* install the spare table */
1185 retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1186
1187 /* respond to the requestor. the return status of the spare
1188 * table installation is passed in the "fcol" field */
1189 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1190 waitreq->fcol = retcode;
1191 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1192 waitreq->next = rf_sparet_resp_queue;
1193 rf_sparet_resp_queue = waitreq;
1194 wakeup(&rf_sparet_resp_queue);
1195 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1196
1197 return (retcode);
1198 #endif
1199
1200 default:
1201 break; /* fall through to the os-specific code below */
1202
1203 }
1204
1205 if (!raidPtr->valid)
1206 return (EINVAL);
1207
1208 /*
1209 * Add support for "regular" device ioctls here.
1210 */
1211
1212 switch (cmd) {
1213 case DIOCGDINFO:
1214 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1215 break;
1216
1217 case DIOCGPART:
1218 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1219 ((struct partinfo *) data)->part =
1220 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1221 break;
1222
1223 case DIOCWDINFO:
1224 case DIOCSDINFO:
1225 if ((error = raidlock(rs)) != 0)
1226 return (error);
1227
1228 rs->sc_flags |= RAIDF_LABELLING;
1229
1230 error = setdisklabel(rs->sc_dkdev.dk_label,
1231 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1232 if (error == 0) {
1233 if (cmd == DIOCWDINFO)
1234 error = writedisklabel(RAIDLABELDEV(dev),
1235 raidstrategy, rs->sc_dkdev.dk_label,
1236 rs->sc_dkdev.dk_cpulabel);
1237 }
1238 rs->sc_flags &= ~RAIDF_LABELLING;
1239
1240 raidunlock(rs);
1241
1242 if (error)
1243 return (error);
1244 break;
1245
1246 case DIOCWLABEL:
1247 if (*(int *) data != 0)
1248 rs->sc_flags |= RAIDF_WLABEL;
1249 else
1250 rs->sc_flags &= ~RAIDF_WLABEL;
1251 break;
1252
1253 case DIOCGDEFLABEL:
1254 raidgetdefaultlabel(raidPtr, rs,
1255 (struct disklabel *) data);
1256 break;
1257
1258 default:
1259 retcode = ENOTTY;
1260 }
1261 return (retcode);
1262
1263 }
1264
1265
1266 /* raidinit -- complete the rest of the initialization for the
1267 RAIDframe device. */
1268
1269
1270 static int
1271 raidinit(dev, raidPtr, unit)
1272 dev_t dev;
1273 RF_Raid_t *raidPtr;
1274 int unit;
1275 {
1276 int retcode;
1277 struct raid_softc *rs;
1278
1279 retcode = 0;
1280
1281 rs = &raid_softc[unit];
1282 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1283 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1284
1285
1286 /* XXX should check return code first... */
1287 rs->sc_flags |= RAIDF_INITED;
1288
1289 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1290
1291 rs->sc_dkdev.dk_name = rs->sc_xname;
1292
1293 /* disk_attach actually creates space for the CPU disklabel, among
1294 * other things, so it's critical to call this *BEFORE* we try putzing
1295 * with disklabels. */
1296
1297 disk_attach(&rs->sc_dkdev);
1298
1299 /* XXX There may be a weird interaction here between this, and
1300 * protectedSectors, as used in RAIDframe. */
1301
1302 rs->sc_size = raidPtr->totalSectors;
1303 rs->sc_dev = dev;
1304
1305 return (retcode);
1306 }
1307
1308 /* wake up the daemon & tell it to get us a spare table
1309 * XXX
1310 * the entries in the queues should be tagged with the raidPtr
1311 * so that in the extremely rare case that two recons happen at once,
1312 * we know for which device were requesting a spare table
1313 * XXX
1314 *
1315 * XXX This code is not currently used. GO
1316 */
1317 int
1318 rf_GetSpareTableFromDaemon(req)
1319 RF_SparetWait_t *req;
1320 {
1321 int retcode;
1322
1323 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1324 req->next = rf_sparet_wait_queue;
1325 rf_sparet_wait_queue = req;
1326 wakeup(&rf_sparet_wait_queue);
1327
1328 /* mpsleep unlocks the mutex */
1329 while (!rf_sparet_resp_queue) {
1330 tsleep(&rf_sparet_resp_queue, PRIBIO,
1331 "raidframe getsparetable", 0);
1332 }
1333 req = rf_sparet_resp_queue;
1334 rf_sparet_resp_queue = req->next;
1335 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1336
1337 retcode = req->fcol;
1338 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1339 * alloc'd */
1340 return (retcode);
1341 }
1342
1343 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1344 * bp & passes it down.
1345 * any calls originating in the kernel must use non-blocking I/O
1346 * do some extra sanity checking to return "appropriate" error values for
1347 * certain conditions (to make some standard utilities work)
1348 *
1349 * Formerly known as: rf_DoAccessKernel
1350 */
1351 void
1352 raidstart(raidPtr)
1353 RF_Raid_t *raidPtr;
1354 {
1355 RF_SectorCount_t num_blocks, pb, sum;
1356 RF_RaidAddr_t raid_addr;
1357 int retcode;
1358 struct partition *pp;
1359 daddr_t blocknum;
1360 int unit;
1361 struct raid_softc *rs;
1362 int do_async;
1363 struct buf *bp;
1364 struct buf *dp;
1365
1366 unit = raidPtr->raidid;
1367 rs = &raid_softc[unit];
1368
1369 /* Check to see if we're at the limit... */
1370 RF_LOCK_MUTEX(raidPtr->mutex);
1371 while (raidPtr->openings > 0) {
1372 RF_UNLOCK_MUTEX(raidPtr->mutex);
1373
1374 /* get the next item, if any, from the queue */
1375 dp = &rs->buf_queue;
1376 bp = dp->b_actf;
1377 if (bp == NULL) {
1378 /* nothing more to do */
1379 return;
1380 }
1381
1382 /* update structures */
1383 dp = bp->b_actf;
1384 if (dp != NULL) {
1385 dp->b_actb = bp->b_actb;
1386 } else {
1387 rs->buf_queue.b_actb = bp->b_actb;
1388 }
1389 *bp->b_actb = dp;
1390
1391 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1392 * partition.. Need to make it absolute to the underlying
1393 * device.. */
1394
1395 blocknum = bp->b_blkno;
1396 if (DISKPART(bp->b_dev) != RAW_PART) {
1397 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1398 blocknum += pp->p_offset;
1399 }
1400
1401 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1402 (int) blocknum));
1403
1404 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1405 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1406
1407 /* *THIS* is where we adjust what block we're going to...
1408 * but DO NOT TOUCH bp->b_blkno!!! */
1409 raid_addr = blocknum;
1410
1411 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1412 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1413 sum = raid_addr + num_blocks + pb;
1414 if (1 || rf_debugKernelAccess) {
1415 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1416 (int) raid_addr, (int) sum, (int) num_blocks,
1417 (int) pb, (int) bp->b_resid));
1418 }
1419 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1420 || (sum < num_blocks) || (sum < pb)) {
1421 bp->b_error = ENOSPC;
1422 bp->b_flags |= B_ERROR;
1423 bp->b_resid = bp->b_bcount;
1424 biodone(bp);
1425 RF_LOCK_MUTEX(raidPtr->mutex);
1426 continue;
1427 }
1428 /*
1429 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1430 */
1431
1432 if (bp->b_bcount & raidPtr->sectorMask) {
1433 bp->b_error = EINVAL;
1434 bp->b_flags |= B_ERROR;
1435 bp->b_resid = bp->b_bcount;
1436 biodone(bp);
1437 RF_LOCK_MUTEX(raidPtr->mutex);
1438 continue;
1439
1440 }
1441 db1_printf(("Calling DoAccess..\n"));
1442
1443
1444 RF_LOCK_MUTEX(raidPtr->mutex);
1445 raidPtr->openings--;
1446 RF_UNLOCK_MUTEX(raidPtr->mutex);
1447
1448 /*
1449 * Everything is async.
1450 */
1451 do_async = 1;
1452
1453 /* don't ever condition on bp->b_flags & B_WRITE.
1454 * always condition on B_READ instead */
1455
1456 /* XXX we're still at splbio() here... do we *really*
1457 need to be? */
1458
1459
1460 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1461 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1462 do_async, raid_addr, num_blocks,
1463 bp->b_un.b_addr, bp, NULL, NULL,
1464 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1465
1466
1467 RF_LOCK_MUTEX(raidPtr->mutex);
1468 }
1469 RF_UNLOCK_MUTEX(raidPtr->mutex);
1470 }
1471
1472
1473
1474
1475 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1476
1477 int
1478 rf_DispatchKernelIO(queue, req)
1479 RF_DiskQueue_t *queue;
1480 RF_DiskQueueData_t *req;
1481 {
1482 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1483 struct buf *bp;
1484 struct raidbuf *raidbp = NULL;
1485 struct raid_softc *rs;
1486 int unit;
1487 int s;
1488
1489 s=0;
1490 /* s = splbio();*/ /* want to test this */
1491 /* XXX along with the vnode, we also need the softc associated with
1492 * this device.. */
1493
1494 req->queue = queue;
1495
1496 unit = queue->raidPtr->raidid;
1497
1498 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1499
1500 if (unit >= numraid) {
1501 printf("Invalid unit number: %d %d\n", unit, numraid);
1502 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1503 }
1504 rs = &raid_softc[unit];
1505
1506 /* XXX is this the right place? */
1507 disk_busy(&rs->sc_dkdev);
1508
1509 bp = req->bp;
1510 #if 1
1511 /* XXX when there is a physical disk failure, someone is passing us a
1512 * buffer that contains old stuff!! Attempt to deal with this problem
1513 * without taking a performance hit... (not sure where the real bug
1514 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1515
1516 if (bp->b_flags & B_ERROR) {
1517 bp->b_flags &= ~B_ERROR;
1518 }
1519 if (bp->b_error != 0) {
1520 bp->b_error = 0;
1521 }
1522 #endif
1523 raidbp = RAIDGETBUF(rs);
1524
1525 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1526
1527 /*
1528 * context for raidiodone
1529 */
1530 raidbp->rf_obp = bp;
1531 raidbp->req = req;
1532
1533 LIST_INIT(&raidbp->rf_buf.b_dep);
1534
1535 switch (req->type) {
1536 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1537 /* XXX need to do something extra here.. */
1538 /* I'm leaving this in, as I've never actually seen it used,
1539 * and I'd like folks to report it... GO */
1540 printf(("WAKEUP CALLED\n"));
1541 queue->numOutstanding++;
1542
1543 /* XXX need to glue the original buffer into this?? */
1544
1545 KernelWakeupFunc(&raidbp->rf_buf);
1546 break;
1547
1548 case RF_IO_TYPE_READ:
1549 case RF_IO_TYPE_WRITE:
1550
1551 if (req->tracerec) {
1552 RF_ETIMER_START(req->tracerec->timer);
1553 }
1554 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1555 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1556 req->sectorOffset, req->numSector,
1557 req->buf, KernelWakeupFunc, (void *) req,
1558 queue->raidPtr->logBytesPerSector, req->b_proc);
1559
1560 if (rf_debugKernelAccess) {
1561 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1562 (long) bp->b_blkno));
1563 }
1564 queue->numOutstanding++;
1565 queue->last_deq_sector = req->sectorOffset;
1566 /* acc wouldn't have been let in if there were any pending
1567 * reqs at any other priority */
1568 queue->curPriority = req->priority;
1569
1570 db1_printf(("Going for %c to unit %d row %d col %d\n",
1571 req->type, unit, queue->row, queue->col));
1572 db1_printf(("sector %d count %d (%d bytes) %d\n",
1573 (int) req->sectorOffset, (int) req->numSector,
1574 (int) (req->numSector <<
1575 queue->raidPtr->logBytesPerSector),
1576 (int) queue->raidPtr->logBytesPerSector));
1577 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1578 raidbp->rf_buf.b_vp->v_numoutput++;
1579 }
1580 VOP_STRATEGY(&raidbp->rf_buf);
1581
1582 break;
1583
1584 default:
1585 panic("bad req->type in rf_DispatchKernelIO");
1586 }
1587 db1_printf(("Exiting from DispatchKernelIO\n"));
1588 /* splx(s); */ /* want to test this */
1589 return (0);
1590 }
1591 /* this is the callback function associated with a I/O invoked from
1592 kernel code.
1593 */
1594 static void
1595 KernelWakeupFunc(vbp)
1596 struct buf *vbp;
1597 {
1598 RF_DiskQueueData_t *req = NULL;
1599 RF_DiskQueue_t *queue;
1600 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1601 struct buf *bp;
1602 struct raid_softc *rs;
1603 int unit;
1604 register int s;
1605
1606 s = splbio();
1607 db1_printf(("recovering the request queue:\n"));
1608 req = raidbp->req;
1609
1610 bp = raidbp->rf_obp;
1611
1612 queue = (RF_DiskQueue_t *) req->queue;
1613
1614 if (raidbp->rf_buf.b_flags & B_ERROR) {
1615 bp->b_flags |= B_ERROR;
1616 bp->b_error = raidbp->rf_buf.b_error ?
1617 raidbp->rf_buf.b_error : EIO;
1618 }
1619
1620 /* XXX methinks this could be wrong... */
1621 #if 1
1622 bp->b_resid = raidbp->rf_buf.b_resid;
1623 #endif
1624
1625 if (req->tracerec) {
1626 RF_ETIMER_STOP(req->tracerec->timer);
1627 RF_ETIMER_EVAL(req->tracerec->timer);
1628 RF_LOCK_MUTEX(rf_tracing_mutex);
1629 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1630 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1631 req->tracerec->num_phys_ios++;
1632 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1633 }
1634 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1635
1636 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1637
1638
1639 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1640 * ballistic, and mark the component as hosed... */
1641
1642 if (bp->b_flags & B_ERROR) {
1643 /* Mark the disk as dead */
1644 /* but only mark it once... */
1645 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1646 rf_ds_optimal) {
1647 printf("raid%d: IO Error. Marking %s as failed.\n",
1648 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1649 queue->raidPtr->Disks[queue->row][queue->col].status =
1650 rf_ds_failed;
1651 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1652 queue->raidPtr->numFailures++;
1653 /* XXX here we should bump the version number for each component, and write that data out */
1654 } else { /* Disk is already dead... */
1655 /* printf("Disk already marked as dead!\n"); */
1656 }
1657
1658 }
1659
1660 rs = &raid_softc[unit];
1661 RAIDPUTBUF(rs, raidbp);
1662
1663
1664 if (bp->b_resid == 0) {
1665 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1666 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1667 }
1668
1669 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1670 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1671
1672 splx(s);
1673 }
1674
1675
1676
1677 /*
1678 * initialize a buf structure for doing an I/O in the kernel.
1679 */
1680 static void
1681 InitBP(
1682 struct buf * bp,
1683 struct vnode * b_vp,
1684 unsigned rw_flag,
1685 dev_t dev,
1686 RF_SectorNum_t startSect,
1687 RF_SectorCount_t numSect,
1688 caddr_t buf,
1689 void (*cbFunc) (struct buf *),
1690 void *cbArg,
1691 int logBytesPerSector,
1692 struct proc * b_proc)
1693 {
1694 /* bp->b_flags = B_PHYS | rw_flag; */
1695 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1696 bp->b_bcount = numSect << logBytesPerSector;
1697 bp->b_bufsize = bp->b_bcount;
1698 bp->b_error = 0;
1699 bp->b_dev = dev;
1700 bp->b_un.b_addr = buf;
1701 bp->b_blkno = startSect;
1702 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1703 if (bp->b_bcount == 0) {
1704 panic("bp->b_bcount is zero in InitBP!!\n");
1705 }
1706 bp->b_proc = b_proc;
1707 bp->b_iodone = cbFunc;
1708 bp->b_vp = b_vp;
1709
1710 }
1711
1712 static void
1713 raidgetdefaultlabel(raidPtr, rs, lp)
1714 RF_Raid_t *raidPtr;
1715 struct raid_softc *rs;
1716 struct disklabel *lp;
1717 {
1718 db1_printf(("Building a default label...\n"));
1719 bzero(lp, sizeof(*lp));
1720
1721 /* fabricate a label... */
1722 lp->d_secperunit = raidPtr->totalSectors;
1723 lp->d_secsize = raidPtr->bytesPerSector;
1724 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1725 lp->d_ntracks = 1;
1726 lp->d_ncylinders = raidPtr->totalSectors /
1727 (lp->d_nsectors * lp->d_ntracks);
1728 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1729
1730 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1731 lp->d_type = DTYPE_RAID;
1732 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1733 lp->d_rpm = 3600;
1734 lp->d_interleave = 1;
1735 lp->d_flags = 0;
1736
1737 lp->d_partitions[RAW_PART].p_offset = 0;
1738 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1739 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1740 lp->d_npartitions = RAW_PART + 1;
1741
1742 lp->d_magic = DISKMAGIC;
1743 lp->d_magic2 = DISKMAGIC;
1744 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1745
1746 }
1747 /*
1748 * Read the disklabel from the raid device. If one is not present, fake one
1749 * up.
1750 */
1751 static void
1752 raidgetdisklabel(dev)
1753 dev_t dev;
1754 {
1755 int unit = raidunit(dev);
1756 struct raid_softc *rs = &raid_softc[unit];
1757 char *errstring;
1758 struct disklabel *lp = rs->sc_dkdev.dk_label;
1759 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1760 RF_Raid_t *raidPtr;
1761
1762 db1_printf(("Getting the disklabel...\n"));
1763
1764 bzero(clp, sizeof(*clp));
1765
1766 raidPtr = raidPtrs[unit];
1767
1768 raidgetdefaultlabel(raidPtr, rs, lp);
1769
1770 /*
1771 * Call the generic disklabel extraction routine.
1772 */
1773 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1774 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1775 if (errstring)
1776 raidmakedisklabel(rs);
1777 else {
1778 int i;
1779 struct partition *pp;
1780
1781 /*
1782 * Sanity check whether the found disklabel is valid.
1783 *
1784 * This is necessary since total size of the raid device
1785 * may vary when an interleave is changed even though exactly
1786 * same componets are used, and old disklabel may used
1787 * if that is found.
1788 */
1789 if (lp->d_secperunit != rs->sc_size)
1790 printf("WARNING: %s: "
1791 "total sector size in disklabel (%d) != "
1792 "the size of raid (%ld)\n", rs->sc_xname,
1793 lp->d_secperunit, (long) rs->sc_size);
1794 for (i = 0; i < lp->d_npartitions; i++) {
1795 pp = &lp->d_partitions[i];
1796 if (pp->p_offset + pp->p_size > rs->sc_size)
1797 printf("WARNING: %s: end of partition `%c' "
1798 "exceeds the size of raid (%ld)\n",
1799 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1800 }
1801 }
1802
1803 }
1804 /*
1805 * Take care of things one might want to take care of in the event
1806 * that a disklabel isn't present.
1807 */
1808 static void
1809 raidmakedisklabel(rs)
1810 struct raid_softc *rs;
1811 {
1812 struct disklabel *lp = rs->sc_dkdev.dk_label;
1813 db1_printf(("Making a label..\n"));
1814
1815 /*
1816 * For historical reasons, if there's no disklabel present
1817 * the raw partition must be marked FS_BSDFFS.
1818 */
1819
1820 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1821
1822 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1823
1824 lp->d_checksum = dkcksum(lp);
1825 }
1826 /*
1827 * Lookup the provided name in the filesystem. If the file exists,
1828 * is a valid block device, and isn't being used by anyone else,
1829 * set *vpp to the file's vnode.
1830 * You'll find the original of this in ccd.c
1831 */
1832 int
1833 raidlookup(path, p, vpp)
1834 char *path;
1835 struct proc *p;
1836 struct vnode **vpp; /* result */
1837 {
1838 struct nameidata nd;
1839 struct vnode *vp;
1840 struct vattr va;
1841 int error;
1842
1843 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1844 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1845 #ifdef DEBUG
1846 printf("RAIDframe: vn_open returned %d\n", error);
1847 #endif
1848 return (error);
1849 }
1850 vp = nd.ni_vp;
1851 if (vp->v_usecount > 1) {
1852 VOP_UNLOCK(vp, 0);
1853 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1854 return (EBUSY);
1855 }
1856 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1857 VOP_UNLOCK(vp, 0);
1858 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1859 return (error);
1860 }
1861 /* XXX: eventually we should handle VREG, too. */
1862 if (va.va_type != VBLK) {
1863 VOP_UNLOCK(vp, 0);
1864 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1865 return (ENOTBLK);
1866 }
1867 VOP_UNLOCK(vp, 0);
1868 *vpp = vp;
1869 return (0);
1870 }
1871 /*
1872 * Wait interruptibly for an exclusive lock.
1873 *
1874 * XXX
1875 * Several drivers do this; it should be abstracted and made MP-safe.
1876 * (Hmm... where have we seen this warning before :-> GO )
1877 */
1878 static int
1879 raidlock(rs)
1880 struct raid_softc *rs;
1881 {
1882 int error;
1883
1884 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1885 rs->sc_flags |= RAIDF_WANTED;
1886 if ((error =
1887 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1888 return (error);
1889 }
1890 rs->sc_flags |= RAIDF_LOCKED;
1891 return (0);
1892 }
1893 /*
1894 * Unlock and wake up any waiters.
1895 */
1896 static void
1897 raidunlock(rs)
1898 struct raid_softc *rs;
1899 {
1900
1901 rs->sc_flags &= ~RAIDF_LOCKED;
1902 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1903 rs->sc_flags &= ~RAIDF_WANTED;
1904 wakeup(rs);
1905 }
1906 }
1907
1908
1909 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1910 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1911
1912 int
1913 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1914 {
1915 RF_ComponentLabel_t component_label;
1916 raidread_component_label(dev, b_vp, &component_label);
1917 component_label.mod_counter = mod_counter;
1918 component_label.clean = RF_RAID_CLEAN;
1919 raidwrite_component_label(dev, b_vp, &component_label);
1920 return(0);
1921 }
1922
1923
1924 int
1925 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1926 {
1927 RF_ComponentLabel_t component_label;
1928 raidread_component_label(dev, b_vp, &component_label);
1929 component_label.mod_counter = mod_counter;
1930 component_label.clean = RF_RAID_DIRTY;
1931 raidwrite_component_label(dev, b_vp, &component_label);
1932 return(0);
1933 }
1934
1935 /* ARGSUSED */
1936 int
1937 raidread_component_label(dev, b_vp, component_label)
1938 dev_t dev;
1939 struct vnode *b_vp;
1940 RF_ComponentLabel_t *component_label;
1941 {
1942 struct buf *bp;
1943 int error;
1944
1945 /* XXX should probably ensure that we don't try to do this if
1946 someone has changed rf_protected_sectors. */
1947
1948 /* get a block of the appropriate size... */
1949 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1950 bp->b_dev = dev;
1951
1952 /* get our ducks in a row for the read */
1953 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
1954 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1955 bp->b_flags = B_BUSY | B_READ;
1956 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
1957
1958 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1959
1960 error = biowait(bp);
1961
1962 if (!error) {
1963 memcpy(component_label, bp->b_un.b_addr,
1964 sizeof(RF_ComponentLabel_t));
1965 #if 0
1966 printf("raidread_component_label: got component label:\n");
1967 printf("Version: %d\n",component_label->version);
1968 printf("Serial Number: %d\n",component_label->serial_number);
1969 printf("Mod counter: %d\n",component_label->mod_counter);
1970 printf("Row: %d\n", component_label->row);
1971 printf("Column: %d\n", component_label->column);
1972 printf("Num Rows: %d\n", component_label->num_rows);
1973 printf("Num Columns: %d\n", component_label->num_columns);
1974 printf("Clean: %d\n", component_label->clean);
1975 printf("Status: %d\n", component_label->status);
1976 #endif
1977 } else {
1978 printf("Failed to read RAID component label!\n");
1979 }
1980
1981 bp->b_flags = B_INVAL | B_AGE;
1982 brelse(bp);
1983 return(error);
1984 }
1985 /* ARGSUSED */
1986 int
1987 raidwrite_component_label(dev, b_vp, component_label)
1988 dev_t dev;
1989 struct vnode *b_vp;
1990 RF_ComponentLabel_t *component_label;
1991 {
1992 struct buf *bp;
1993 int error;
1994
1995 /* get a block of the appropriate size... */
1996 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1997 bp->b_dev = dev;
1998
1999 /* get our ducks in a row for the write */
2000 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2001 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2002 bp->b_flags = B_BUSY | B_WRITE;
2003 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2004
2005 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2006
2007 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2008
2009 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2010 error = biowait(bp);
2011 bp->b_flags = B_INVAL | B_AGE;
2012 brelse(bp);
2013 if (error) {
2014 printf("Failed to write RAID component info!\n");
2015 }
2016
2017 return(error);
2018 }
2019
2020 void
2021 rf_markalldirty( raidPtr )
2022 RF_Raid_t *raidPtr;
2023 {
2024 RF_ComponentLabel_t c_label;
2025 int r,c;
2026
2027 raidPtr->mod_counter++;
2028 for (r = 0; r < raidPtr->numRow; r++) {
2029 for (c = 0; c < raidPtr->numCol; c++) {
2030 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2031 raidread_component_label(
2032 raidPtr->Disks[r][c].dev,
2033 raidPtr->raid_cinfo[r][c].ci_vp,
2034 &c_label);
2035 if (c_label.status == rf_ds_spared) {
2036 /* XXX do something special...
2037 but whatever you do, don't
2038 try to access it!! */
2039 } else {
2040 #if 0
2041 c_label.status =
2042 raidPtr->Disks[r][c].status;
2043 raidwrite_component_label(
2044 raidPtr->Disks[r][c].dev,
2045 raidPtr->raid_cinfo[r][c].ci_vp,
2046 &c_label);
2047 #endif
2048 raidmarkdirty(
2049 raidPtr->Disks[r][c].dev,
2050 raidPtr->raid_cinfo[r][c].ci_vp,
2051 raidPtr->mod_counter);
2052 }
2053 }
2054 }
2055 }
2056 /* printf("Component labels marked dirty.\n"); */
2057 #if 0
2058 for( c = 0; c < raidPtr->numSpare ; c++) {
2059 sparecol = raidPtr->numCol + c;
2060 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2061 /*
2062
2063 XXX this is where we get fancy and map this spare
2064 into it's correct spot in the array.
2065
2066 */
2067 /*
2068
2069 we claim this disk is "optimal" if it's
2070 rf_ds_used_spare, as that means it should be
2071 directly substitutable for the disk it replaced.
2072 We note that too...
2073
2074 */
2075
2076 for(i=0;i<raidPtr->numRow;i++) {
2077 for(j=0;j<raidPtr->numCol;j++) {
2078 if ((raidPtr->Disks[i][j].spareRow ==
2079 r) &&
2080 (raidPtr->Disks[i][j].spareCol ==
2081 sparecol)) {
2082 srow = r;
2083 scol = sparecol;
2084 break;
2085 }
2086 }
2087 }
2088
2089 raidread_component_label(
2090 raidPtr->Disks[r][sparecol].dev,
2091 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2092 &c_label);
2093 /* make sure status is noted */
2094 c_label.version = RF_COMPONENT_LABEL_VERSION;
2095 c_label.mod_counter = raidPtr->mod_counter;
2096 c_label.serial_number = raidPtr->serial_number;
2097 c_label.row = srow;
2098 c_label.column = scol;
2099 c_label.num_rows = raidPtr->numRow;
2100 c_label.num_columns = raidPtr->numCol;
2101 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2102 c_label.status = rf_ds_optimal;
2103 raidwrite_component_label(
2104 raidPtr->Disks[r][sparecol].dev,
2105 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2106 &c_label);
2107 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2108 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2109 }
2110 }
2111
2112 #endif
2113 }
2114
2115
2116 void
2117 rf_update_component_labels( raidPtr )
2118 RF_Raid_t *raidPtr;
2119 {
2120 RF_ComponentLabel_t c_label;
2121 int sparecol;
2122 int r,c;
2123 int i,j;
2124 int srow, scol;
2125
2126 srow = -1;
2127 scol = -1;
2128
2129 /* XXX should do extra checks to make sure things really are clean,
2130 rather than blindly setting the clean bit... */
2131
2132 raidPtr->mod_counter++;
2133
2134 for (r = 0; r < raidPtr->numRow; r++) {
2135 for (c = 0; c < raidPtr->numCol; c++) {
2136 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2137 raidread_component_label(
2138 raidPtr->Disks[r][c].dev,
2139 raidPtr->raid_cinfo[r][c].ci_vp,
2140 &c_label);
2141 /* make sure status is noted */
2142 c_label.status = rf_ds_optimal;
2143 raidwrite_component_label(
2144 raidPtr->Disks[r][c].dev,
2145 raidPtr->raid_cinfo[r][c].ci_vp,
2146 &c_label);
2147 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2148 raidmarkclean(
2149 raidPtr->Disks[r][c].dev,
2150 raidPtr->raid_cinfo[r][c].ci_vp,
2151 raidPtr->mod_counter);
2152 }
2153 }
2154 /* else we don't touch it.. */
2155 #if 0
2156 else if (raidPtr->Disks[r][c].status !=
2157 rf_ds_failed) {
2158 raidread_component_label(
2159 raidPtr->Disks[r][c].dev,
2160 raidPtr->raid_cinfo[r][c].ci_vp,
2161 &c_label);
2162 /* make sure status is noted */
2163 c_label.status =
2164 raidPtr->Disks[r][c].status;
2165 raidwrite_component_label(
2166 raidPtr->Disks[r][c].dev,
2167 raidPtr->raid_cinfo[r][c].ci_vp,
2168 &c_label);
2169 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2170 raidmarkclean(
2171 raidPtr->Disks[r][c].dev,
2172 raidPtr->raid_cinfo[r][c].ci_vp,
2173 raidPtr->mod_counter);
2174 }
2175 }
2176 #endif
2177 }
2178 }
2179
2180 for( c = 0; c < raidPtr->numSpare ; c++) {
2181 sparecol = raidPtr->numCol + c;
2182 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2183 /*
2184
2185 we claim this disk is "optimal" if it's
2186 rf_ds_used_spare, as that means it should be
2187 directly substitutable for the disk it replaced.
2188 We note that too...
2189
2190 */
2191
2192 for(i=0;i<raidPtr->numRow;i++) {
2193 for(j=0;j<raidPtr->numCol;j++) {
2194 if ((raidPtr->Disks[i][j].spareRow ==
2195 0) &&
2196 (raidPtr->Disks[i][j].spareCol ==
2197 sparecol)) {
2198 srow = i;
2199 scol = j;
2200 break;
2201 }
2202 }
2203 }
2204
2205 raidread_component_label(
2206 raidPtr->Disks[0][sparecol].dev,
2207 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2208 &c_label);
2209 /* make sure status is noted */
2210 c_label.version = RF_COMPONENT_LABEL_VERSION;
2211 c_label.mod_counter = raidPtr->mod_counter;
2212 c_label.serial_number = raidPtr->serial_number;
2213 c_label.row = srow;
2214 c_label.column = scol;
2215 c_label.num_rows = raidPtr->numRow;
2216 c_label.num_columns = raidPtr->numCol;
2217 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2218 c_label.status = rf_ds_optimal;
2219 raidwrite_component_label(
2220 raidPtr->Disks[0][sparecol].dev,
2221 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2222 &c_label);
2223 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2224 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2225 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2226 raidPtr->mod_counter);
2227 }
2228 }
2229 }
2230 /* printf("Component labels updated\n"); */
2231 }
2232
2233 void
2234 rf_ReconThread(req)
2235 struct rf_recon_req *req;
2236 {
2237 int s;
2238 RF_Raid_t *raidPtr;
2239
2240 s = splbio();
2241 raidPtr = (RF_Raid_t *) req->raidPtr;
2242 raidPtr->recon_in_progress = 1;
2243
2244 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2245 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2246
2247 /* XXX get rid of this! we don't need it at all.. */
2248 RF_Free(req, sizeof(*req));
2249
2250 raidPtr->recon_in_progress = 0;
2251 splx(s);
2252
2253 /* That's all... */
2254 kthread_exit(0); /* does not return */
2255 }
2256
2257 void
2258 rf_RewriteParityThread(raidPtr)
2259 RF_Raid_t *raidPtr;
2260 {
2261 int retcode;
2262 int s;
2263
2264 raidPtr->parity_rewrite_in_progress = 1;
2265 s = splbio();
2266 retcode = rf_RewriteParity(raidPtr);
2267 splx(s);
2268 if (retcode) {
2269 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2270 } else {
2271 /* set the clean bit! If we shutdown correctly,
2272 the clean bit on each component label will get
2273 set */
2274 raidPtr->parity_good = RF_RAID_CLEAN;
2275 }
2276 raidPtr->parity_rewrite_in_progress = 0;
2277
2278 /* That's all... */
2279 kthread_exit(0); /* does not return */
2280 }
2281
2282
2283 void
2284 rf_CopybackThread(raidPtr)
2285 RF_Raid_t *raidPtr;
2286 {
2287 int s;
2288
2289 raidPtr->copyback_in_progress = 1;
2290 s = splbio();
2291 rf_CopybackReconstructedData(raidPtr);
2292 splx(s);
2293 raidPtr->copyback_in_progress = 0;
2294
2295 /* That's all... */
2296 kthread_exit(0); /* does not return */
2297 }
2298
2299
2300 void
2301 rf_ReconstructInPlaceThread(req)
2302 struct rf_recon_req *req;
2303 {
2304 int retcode;
2305 int s;
2306 RF_Raid_t *raidPtr;
2307
2308 s = splbio();
2309 raidPtr = req->raidPtr;
2310 raidPtr->recon_in_progress = 1;
2311 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2312 RF_Free(req, sizeof(*req));
2313 raidPtr->recon_in_progress = 0;
2314 splx(s);
2315
2316 /* That's all... */
2317 kthread_exit(0); /* does not return */
2318 }
2319