rf_netbsdkintf.c revision 1.16.2.9 1 /* $NetBSD: rf_netbsdkintf.c,v 1.16.2.9 2000/02/29 23:31:50 he Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 struct buf buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 static int numraid = 0;
271
272 /*
273 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
274 * Be aware that large numbers can allow the driver to consume a lot of
275 * kernel memory, especially on writes...
276 */
277
278 #ifndef RAIDOUTSTANDING
279 #define RAIDOUTSTANDING 10
280 #endif
281
282 #define RAIDLABELDEV(dev) \
283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
284
285 /* declared here, and made public, for the benefit of KVM stuff.. */
286 struct raid_softc *raid_softc;
287
288 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
289 struct disklabel *));
290 static void raidgetdisklabel __P((dev_t));
291 static void raidmakedisklabel __P((struct raid_softc *));
292
293 static int raidlock __P((struct raid_softc *));
294 static void raidunlock __P((struct raid_softc *));
295 int raidlookup __P((char *, struct proc * p, struct vnode **));
296
297 static void rf_markalldirty __P((RF_Raid_t *));
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305
306 #ifdef DEBUG
307 printf("raidattach: Asked for %d units\n", num);
308 #endif
309
310 if (num <= 0) {
311 #ifdef DIAGNOSTIC
312 panic("raidattach: count <= 0");
313 #endif
314 return;
315 }
316 /* This is where all the initialization stuff gets done. */
317
318 /* Make some space for requested number of units... */
319
320 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
321 if (raidPtrs == NULL) {
322 panic("raidPtrs is NULL!!\n");
323 }
324
325 rc = rf_mutex_init(&rf_sparet_wait_mutex);
326 if (rc) {
327 RF_PANIC();
328 }
329
330 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
331 recon_queue = NULL;
332
333 for (i = 0; i < numraid; i++)
334 raidPtrs[i] = NULL;
335 rc = rf_BootRaidframe();
336 if (rc == 0)
337 printf("Kernelized RAIDframe activated\n");
338 else
339 panic("Serious error booting RAID!!\n");
340
341 rf_kbooted = RFK_BOOT_GOOD;
342
343 /* put together some datastructures like the CCD device does.. This
344 * lets us lock the device and what-not when it gets opened. */
345
346 raid_softc = (struct raid_softc *)
347 malloc(num * sizeof(struct raid_softc),
348 M_RAIDFRAME, M_NOWAIT);
349 if (raid_softc == NULL) {
350 printf("WARNING: no memory for RAIDframe driver\n");
351 return;
352 }
353 numraid = num;
354 bzero(raid_softc, num * sizeof(struct raid_softc));
355
356 for (raidID = 0; raidID < num; raidID++) {
357 raid_softc[raidID].buf_queue.b_actf = NULL;
358 raid_softc[raidID].buf_queue.b_actb =
359 &raid_softc[raidID].buf_queue.b_actf;
360 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
361 (RF_Raid_t *));
362 if (raidPtrs[raidID] == NULL) {
363 printf("raidPtrs[%d] is NULL\n", raidID);
364 }
365 }
366 }
367
368
369 int
370 raidsize(dev)
371 dev_t dev;
372 {
373 struct raid_softc *rs;
374 struct disklabel *lp;
375 int part, unit, omask, size;
376
377 unit = raidunit(dev);
378 if (unit >= numraid)
379 return (-1);
380 rs = &raid_softc[unit];
381
382 if ((rs->sc_flags & RAIDF_INITED) == 0)
383 return (-1);
384
385 part = DISKPART(dev);
386 omask = rs->sc_dkdev.dk_openmask & (1 << part);
387 lp = rs->sc_dkdev.dk_label;
388
389 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
390 return (-1);
391
392 if (lp->d_partitions[part].p_fstype != FS_SWAP)
393 size = -1;
394 else
395 size = lp->d_partitions[part].p_size *
396 (lp->d_secsize / DEV_BSIZE);
397
398 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
399 return (-1);
400
401 return (size);
402
403 }
404
405 int
406 raiddump(dev, blkno, va, size)
407 dev_t dev;
408 daddr_t blkno;
409 caddr_t va;
410 size_t size;
411 {
412 /* Not implemented. */
413 return ENXIO;
414 }
415 /* ARGSUSED */
416 int
417 raidopen(dev, flags, fmt, p)
418 dev_t dev;
419 int flags, fmt;
420 struct proc *p;
421 {
422 int unit = raidunit(dev);
423 struct raid_softc *rs;
424 struct disklabel *lp;
425 int part, pmask;
426 int error = 0;
427
428 if (unit >= numraid)
429 return (ENXIO);
430 rs = &raid_softc[unit];
431
432 if ((error = raidlock(rs)) != 0)
433 return (error);
434 lp = rs->sc_dkdev.dk_label;
435
436 part = DISKPART(dev);
437 pmask = (1 << part);
438
439 db1_printf(("Opening raid device number: %d partition: %d\n",
440 unit, part));
441
442
443 if ((rs->sc_flags & RAIDF_INITED) &&
444 (rs->sc_dkdev.dk_openmask == 0))
445 raidgetdisklabel(dev);
446
447 /* make sure that this partition exists */
448
449 if (part != RAW_PART) {
450 db1_printf(("Not a raw partition..\n"));
451 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
452 ((part >= lp->d_npartitions) ||
453 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
454 error = ENXIO;
455 raidunlock(rs);
456 db1_printf(("Bailing out...\n"));
457 return (error);
458 }
459 }
460 /* Prevent this unit from being unconfigured while open. */
461 switch (fmt) {
462 case S_IFCHR:
463 rs->sc_dkdev.dk_copenmask |= pmask;
464 break;
465
466 case S_IFBLK:
467 rs->sc_dkdev.dk_bopenmask |= pmask;
468 break;
469 }
470
471 if ((rs->sc_dkdev.dk_openmask == 0) &&
472 ((rs->sc_flags & RAIDF_INITED) != 0)) {
473 /* First one... mark things as dirty... Note that we *MUST*
474 have done a configure before this. I DO NOT WANT TO BE
475 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
476 THAT THEY BELONG TOGETHER!!!!! */
477 /* XXX should check to see if we're only open for reading
478 here... If so, we needn't do this, but then need some
479 other way of keeping track of what's happened.. */
480
481 rf_markalldirty( raidPtrs[unit] );
482 }
483
484
485 rs->sc_dkdev.dk_openmask =
486 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
487
488 raidunlock(rs);
489
490 return (error);
491
492
493 }
494 /* ARGSUSED */
495 int
496 raidclose(dev, flags, fmt, p)
497 dev_t dev;
498 int flags, fmt;
499 struct proc *p;
500 {
501 int unit = raidunit(dev);
502 struct raid_softc *rs;
503 int error = 0;
504 int part;
505
506 if (unit >= numraid)
507 return (ENXIO);
508 rs = &raid_softc[unit];
509
510 if ((error = raidlock(rs)) != 0)
511 return (error);
512
513 part = DISKPART(dev);
514
515 /* ...that much closer to allowing unconfiguration... */
516 switch (fmt) {
517 case S_IFCHR:
518 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
519 break;
520
521 case S_IFBLK:
522 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
523 break;
524 }
525 rs->sc_dkdev.dk_openmask =
526 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
527
528 if ((rs->sc_dkdev.dk_openmask == 0) &&
529 ((rs->sc_flags & RAIDF_INITED) != 0)) {
530 /* Last one... device is not unconfigured yet.
531 Device shutdown has taken care of setting the
532 clean bits if RAIDF_INITED is not set
533 mark things as clean... */
534 rf_update_component_labels( raidPtrs[unit] );
535 }
536
537 raidunlock(rs);
538 return (0);
539
540 }
541
542 void
543 raidstrategy(bp)
544 register struct buf *bp;
545 {
546 register int s;
547
548 unsigned int raidID = raidunit(bp->b_dev);
549 RF_Raid_t *raidPtr;
550 struct raid_softc *rs = &raid_softc[raidID];
551 struct disklabel *lp;
552 struct buf *dp;
553 int wlabel;
554
555 #if 0
556 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
557 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
558 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
559 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
560 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
561
562 if (bp->b_flags & B_READ)
563 db1_printf(("READ\n"));
564 else
565 db1_printf(("WRITE\n"));
566 #endif
567 if (rf_kbooted != RFK_BOOT_GOOD)
568 return;
569 if (raidID >= numraid || !raidPtrs[raidID]) {
570 bp->b_error = ENODEV;
571 bp->b_flags |= B_ERROR;
572 bp->b_resid = bp->b_bcount;
573 biodone(bp);
574 return;
575 }
576 raidPtr = raidPtrs[raidID];
577 if (!raidPtr->valid) {
578 bp->b_error = ENODEV;
579 bp->b_flags |= B_ERROR;
580 bp->b_resid = bp->b_bcount;
581 biodone(bp);
582 return;
583 }
584 if (bp->b_bcount == 0) {
585 db1_printf(("b_bcount is zero..\n"));
586 biodone(bp);
587 return;
588 }
589 lp = rs->sc_dkdev.dk_label;
590
591 /*
592 * Do bounds checking and adjust transfer. If there's an
593 * error, the bounds check will flag that for us.
594 */
595
596 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
597 if (DISKPART(bp->b_dev) != RAW_PART)
598 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
599 db1_printf(("Bounds check failed!!:%d %d\n",
600 (int) bp->b_blkno, (int) wlabel));
601 biodone(bp);
602 return;
603 }
604 s = splbio();
605
606 bp->b_resid = 0;
607
608 /* stuff it onto our queue */
609
610 dp = &rs->buf_queue;
611 bp->b_actf = NULL;
612 bp->b_actb = dp->b_actb;
613 *dp->b_actb = bp;
614 dp->b_actb = &bp->b_actf;
615
616 raidstart(raidPtrs[raidID]);
617
618 splx(s);
619 }
620 /* ARGSUSED */
621 int
622 raidread(dev, uio, flags)
623 dev_t dev;
624 struct uio *uio;
625 int flags;
626 {
627 int unit = raidunit(dev);
628 struct raid_softc *rs;
629 int part;
630
631 if (unit >= numraid)
632 return (ENXIO);
633 rs = &raid_softc[unit];
634
635 if ((rs->sc_flags & RAIDF_INITED) == 0)
636 return (ENXIO);
637 part = DISKPART(dev);
638
639 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
640
641 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
642
643 }
644 /* ARGSUSED */
645 int
646 raidwrite(dev, uio, flags)
647 dev_t dev;
648 struct uio *uio;
649 int flags;
650 {
651 int unit = raidunit(dev);
652 struct raid_softc *rs;
653
654 if (unit >= numraid)
655 return (ENXIO);
656 rs = &raid_softc[unit];
657
658 if ((rs->sc_flags & RAIDF_INITED) == 0)
659 return (ENXIO);
660 db1_printf(("raidwrite\n"));
661 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
662
663 }
664
665 int
666 raidioctl(dev, cmd, data, flag, p)
667 dev_t dev;
668 u_long cmd;
669 caddr_t data;
670 int flag;
671 struct proc *p;
672 {
673 int unit = raidunit(dev);
674 int error = 0;
675 int part, pmask;
676 struct raid_softc *rs;
677 #if 0
678 int r, c;
679 #endif
680 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
681
682 /* struct ccdbuf *cbp; */
683 /* struct raidbuf *raidbp; */
684 RF_Config_t *k_cfg, *u_cfg;
685 u_char *specific_buf;
686 int retcode = 0;
687 int row;
688 int column;
689 int s;
690 struct rf_recon_req *rrcopy, *rr;
691 RF_ComponentLabel_t *component_label;
692 RF_ComponentLabel_t ci_label;
693 RF_ComponentLabel_t **c_label_ptr;
694 RF_SingleComponent_t *sparePtr,*componentPtr;
695 RF_SingleComponent_t hot_spare;
696 RF_SingleComponent_t component;
697
698 if (unit >= numraid)
699 return (ENXIO);
700 rs = &raid_softc[unit];
701
702 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
703 (int) DISKPART(dev), (int) unit, (int) cmd));
704
705 /* Must be open for writes for these commands... */
706 switch (cmd) {
707 case DIOCSDINFO:
708 case DIOCWDINFO:
709 case DIOCWLABEL:
710 if ((flag & FWRITE) == 0)
711 return (EBADF);
712 }
713
714 /* Must be initialized for these... */
715 switch (cmd) {
716 case DIOCGDINFO:
717 case DIOCSDINFO:
718 case DIOCWDINFO:
719 case DIOCGPART:
720 case DIOCWLABEL:
721 case DIOCGDEFLABEL:
722 case RAIDFRAME_SHUTDOWN:
723 case RAIDFRAME_REWRITEPARITY:
724 case RAIDFRAME_GET_INFO:
725 case RAIDFRAME_RESET_ACCTOTALS:
726 case RAIDFRAME_GET_ACCTOTALS:
727 case RAIDFRAME_KEEP_ACCTOTALS:
728 case RAIDFRAME_GET_SIZE:
729 case RAIDFRAME_FAIL_DISK:
730 case RAIDFRAME_COPYBACK:
731 case RAIDFRAME_CHECKRECON:
732 case RAIDFRAME_GET_COMPONENT_LABEL:
733 case RAIDFRAME_SET_COMPONENT_LABEL:
734 case RAIDFRAME_ADD_HOT_SPARE:
735 case RAIDFRAME_REMOVE_HOT_SPARE:
736 case RAIDFRAME_INIT_LABELS:
737 case RAIDFRAME_REBUILD_IN_PLACE:
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 }
741
742 switch (cmd) {
743
744
745 /* configure the system */
746 case RAIDFRAME_CONFIGURE:
747
748 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
749 /* copy-in the configuration information */
750 /* data points to a pointer to the configuration structure */
751 u_cfg = *((RF_Config_t **) data);
752 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
753 if (k_cfg == NULL) {
754 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
755 return (ENOMEM);
756 }
757 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
758 sizeof(RF_Config_t));
759 if (retcode) {
760 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
761 retcode));
762 return (retcode);
763 }
764 /* allocate a buffer for the layout-specific data, and copy it
765 * in */
766 if (k_cfg->layoutSpecificSize) {
767 if (k_cfg->layoutSpecificSize > 10000) {
768 /* sanity check */
769 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
770 return (EINVAL);
771 }
772 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
773 (u_char *));
774 if (specific_buf == NULL) {
775 RF_Free(k_cfg, sizeof(RF_Config_t));
776 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
777 return (ENOMEM);
778 }
779 retcode = copyin(k_cfg->layoutSpecific,
780 (caddr_t) specific_buf,
781 k_cfg->layoutSpecificSize);
782 if (retcode) {
783 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
784 retcode));
785 return (retcode);
786 }
787 } else
788 specific_buf = NULL;
789 k_cfg->layoutSpecific = specific_buf;
790
791 /* should do some kind of sanity check on the configuration.
792 * Store the sum of all the bytes in the last byte? */
793
794 #if 0
795 db1_printf(("Considering configuring the system.:%d 0x%x\n",
796 unit, p));
797 #endif
798
799 /* We need the pointer to this a little deeper, so stash it
800 * here... */
801
802 raidPtrs[unit]->proc = p;
803
804 /* configure the system */
805
806 raidPtrs[unit]->raidid = unit;
807
808 retcode = rf_Configure(raidPtrs[unit], k_cfg);
809
810 /* allow this many simultaneous IO's to this RAID device */
811 raidPtrs[unit]->openings = RAIDOUTSTANDING;
812
813 if (retcode == 0) {
814 retcode = raidinit(dev, raidPtrs[unit], unit);
815 rf_markalldirty( raidPtrs[unit] );
816 }
817 /* free the buffers. No return code here. */
818 if (k_cfg->layoutSpecificSize) {
819 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
820 }
821 RF_Free(k_cfg, sizeof(RF_Config_t));
822
823 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
824 retcode));
825
826 return (retcode);
827
828 /* shutdown the system */
829 case RAIDFRAME_SHUTDOWN:
830
831 if ((error = raidlock(rs)) != 0)
832 return (error);
833
834 /*
835 * If somebody has a partition mounted, we shouldn't
836 * shutdown.
837 */
838
839 part = DISKPART(dev);
840 pmask = (1 << part);
841 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
842 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
843 (rs->sc_dkdev.dk_copenmask & pmask))) {
844 raidunlock(rs);
845 return (EBUSY);
846 }
847
848 if (rf_debugKernelAccess) {
849 printf("call shutdown\n");
850 }
851 raidPtrs[unit]->proc = p; /* XXX necessary evil */
852
853 retcode = rf_Shutdown(raidPtrs[unit]);
854
855 db1_printf(("Done main shutdown\n"));
856
857 pool_destroy(&rs->sc_cbufpool);
858 db1_printf(("Done freeing component buffer freelist\n"));
859
860 /* It's no longer initialized... */
861 rs->sc_flags &= ~RAIDF_INITED;
862
863 /* Detach the disk. */
864 disk_detach(&rs->sc_dkdev);
865
866 raidunlock(rs);
867
868 return (retcode);
869 case RAIDFRAME_GET_COMPONENT_LABEL:
870 c_label_ptr = (RF_ComponentLabel_t **) data;
871 /* need to read the component label for the disk indicated
872 by row,column in component_label
873 XXX need to sanity check these values!!!
874 */
875
876 /* For practice, let's get it directly fromdisk, rather
877 than from the in-core copy */
878 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
879 (RF_ComponentLabel_t *));
880 if (component_label == NULL)
881 return (ENOMEM);
882
883 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
884
885 retcode = copyin( *c_label_ptr, component_label,
886 sizeof(RF_ComponentLabel_t));
887
888 if (retcode) {
889 return(retcode);
890 }
891
892 row = component_label->row;
893 column = component_label->column;
894
895 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
896 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
897 return(EINVAL);
898 }
899
900 raidread_component_label(
901 raidPtrs[unit]->Disks[row][column].dev,
902 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
903 component_label );
904
905 retcode = copyout((caddr_t) component_label,
906 (caddr_t) *c_label_ptr,
907 sizeof(RF_ComponentLabel_t));
908 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
909 return (retcode);
910
911 case RAIDFRAME_SET_COMPONENT_LABEL:
912 component_label = (RF_ComponentLabel_t *) data;
913
914 /* XXX check the label for valid stuff... */
915 /* Note that some things *should not* get modified --
916 the user should be re-initing the labels instead of
917 trying to patch things.
918 */
919
920 printf("Got component label:\n");
921 printf("Version: %d\n",component_label->version);
922 printf("Serial Number: %d\n",component_label->serial_number);
923 printf("Mod counter: %d\n",component_label->mod_counter);
924 printf("Row: %d\n", component_label->row);
925 printf("Column: %d\n", component_label->column);
926 printf("Num Rows: %d\n", component_label->num_rows);
927 printf("Num Columns: %d\n", component_label->num_columns);
928 printf("Clean: %d\n", component_label->clean);
929 printf("Status: %d\n", component_label->status);
930
931 row = component_label->row;
932 column = component_label->column;
933
934 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
935 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
936 return(EINVAL);
937 }
938
939 /* XXX this isn't allowed to do anything for now :-) */
940 #if 0
941 raidwrite_component_label(
942 raidPtrs[unit]->Disks[row][column].dev,
943 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
944 component_label );
945 #endif
946 return (0);
947
948 case RAIDFRAME_INIT_LABELS:
949 component_label = (RF_ComponentLabel_t *) data;
950 /*
951 we only want the serial number from
952 the above. We get all the rest of the information
953 from the config that was used to create this RAID
954 set.
955 */
956
957 raidPtrs[unit]->serial_number = component_label->serial_number;
958 /* current version number */
959 ci_label.version = RF_COMPONENT_LABEL_VERSION;
960 ci_label.serial_number = component_label->serial_number;
961 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
962 ci_label.num_rows = raidPtrs[unit]->numRow;
963 ci_label.num_columns = raidPtrs[unit]->numCol;
964 ci_label.clean = RF_RAID_DIRTY; /* not clean */
965 ci_label.status = rf_ds_optimal; /* "It's good!" */
966
967 for(row=0;row<raidPtrs[unit]->numRow;row++) {
968 ci_label.row = row;
969 for(column=0;column<raidPtrs[unit]->numCol;column++) {
970 ci_label.column = column;
971 raidwrite_component_label(
972 raidPtrs[unit]->Disks[row][column].dev,
973 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
974 &ci_label );
975 }
976 }
977
978 return (retcode);
979
980 /* initialize all parity */
981 case RAIDFRAME_REWRITEPARITY:
982
983 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
984 /* Parity for RAID 0 is trivially correct */
985 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
986 return(0);
987 }
988
989 /* borrow the thread of the requesting process */
990 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
991 s = splbio();
992 retcode = rf_RewriteParity(raidPtrs[unit]);
993 splx(s);
994 /* return I/O Error if the parity rewrite fails */
995
996 if (retcode) {
997 retcode = EIO;
998 } else {
999 /* set the clean bit! If we shutdown correctly,
1000 the clean bit on each component label will get
1001 set */
1002 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1003 }
1004 return (retcode);
1005
1006
1007 case RAIDFRAME_ADD_HOT_SPARE:
1008 sparePtr = (RF_SingleComponent_t *) data;
1009 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1010 printf("Adding spare\n");
1011 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1012 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1013 return(retcode);
1014
1015 case RAIDFRAME_REMOVE_HOT_SPARE:
1016 return(retcode);
1017
1018 case RAIDFRAME_REBUILD_IN_PLACE:
1019
1020 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1021 /* Can't do this on a RAID 0!! */
1022 return(EINVAL);
1023 }
1024
1025 componentPtr = (RF_SingleComponent_t *) data;
1026 memcpy( &component, componentPtr,
1027 sizeof(RF_SingleComponent_t));
1028 row = component.row;
1029 column = component.column;
1030 printf("Rebuild: %d %d\n",row, column);
1031 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1032 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1033 return(EINVAL);
1034 }
1035 printf("Attempting a rebuild in place\n");
1036 s = splbio();
1037 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1038 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1039 splx(s);
1040 return(retcode);
1041
1042 /* issue a test-unit-ready through raidframe to the indicated
1043 * device */
1044 #if 0 /* XXX not supported yet (ever?) */
1045 case RAIDFRAME_TUR:
1046 /* debug only */
1047 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1048 return (retcode);
1049 #endif
1050 case RAIDFRAME_GET_INFO:
1051 {
1052 RF_Raid_t *raid = raidPtrs[unit];
1053 RF_DeviceConfig_t *cfg, **ucfgp;
1054 int i, j, d;
1055
1056 if (!raid->valid)
1057 return (ENODEV);
1058 ucfgp = (RF_DeviceConfig_t **) data;
1059 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1060 (RF_DeviceConfig_t *));
1061 if (cfg == NULL)
1062 return (ENOMEM);
1063 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1064 cfg->rows = raid->numRow;
1065 cfg->cols = raid->numCol;
1066 cfg->ndevs = raid->numRow * raid->numCol;
1067 if (cfg->ndevs >= RF_MAX_DISKS) {
1068 cfg->ndevs = 0;
1069 return (ENOMEM);
1070 }
1071 cfg->nspares = raid->numSpare;
1072 if (cfg->nspares >= RF_MAX_DISKS) {
1073 cfg->nspares = 0;
1074 return (ENOMEM);
1075 }
1076 cfg->maxqdepth = raid->maxQueueDepth;
1077 d = 0;
1078 for (i = 0; i < cfg->rows; i++) {
1079 for (j = 0; j < cfg->cols; j++) {
1080 cfg->devs[d] = raid->Disks[i][j];
1081 d++;
1082 }
1083 }
1084 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1085 cfg->spares[i] = raid->Disks[0][j];
1086 }
1087 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1088 sizeof(RF_DeviceConfig_t));
1089 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1090
1091 return (retcode);
1092 }
1093 break;
1094
1095 case RAIDFRAME_RESET_ACCTOTALS:
1096 {
1097 RF_Raid_t *raid = raidPtrs[unit];
1098
1099 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1100 return (0);
1101 }
1102 break;
1103
1104 case RAIDFRAME_GET_ACCTOTALS:
1105 {
1106 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1107 RF_Raid_t *raid = raidPtrs[unit];
1108
1109 *totals = raid->acc_totals;
1110 return (0);
1111 }
1112 break;
1113
1114 case RAIDFRAME_KEEP_ACCTOTALS:
1115 {
1116 RF_Raid_t *raid = raidPtrs[unit];
1117 int *keep = (int *) data;
1118
1119 raid->keep_acc_totals = *keep;
1120 return (0);
1121 }
1122 break;
1123
1124 case RAIDFRAME_GET_SIZE:
1125 *(int *) data = raidPtrs[unit]->totalSectors;
1126 return (0);
1127
1128 #define RAIDFRAME_RECON 1
1129 /* XXX The above should probably be set somewhere else!! GO */
1130 #if RAIDFRAME_RECON > 0
1131
1132 /* fail a disk & optionally start reconstruction */
1133 case RAIDFRAME_FAIL_DISK:
1134
1135 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1136 /* Can't do this on a RAID 0!! */
1137 return(EINVAL);
1138 }
1139
1140 rr = (struct rf_recon_req *) data;
1141
1142 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1143 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1144 return (EINVAL);
1145
1146 printf("raid%d: Failing the disk: row: %d col: %d\n",
1147 unit, rr->row, rr->col);
1148
1149 /* make a copy of the recon request so that we don't rely on
1150 * the user's buffer */
1151 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1152 bcopy(rr, rrcopy, sizeof(*rr));
1153 rrcopy->raidPtr = (void *) raidPtrs[unit];
1154
1155 LOCK_RECON_Q_MUTEX();
1156 rrcopy->next = recon_queue;
1157 recon_queue = rrcopy;
1158 wakeup(&recon_queue);
1159 UNLOCK_RECON_Q_MUTEX();
1160
1161 return (0);
1162
1163 /* invoke a copyback operation after recon on whatever disk
1164 * needs it, if any */
1165 case RAIDFRAME_COPYBACK:
1166
1167 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1168 /* This makes no sense on a RAID 0!! */
1169 return(EINVAL);
1170 }
1171
1172 /* borrow the current thread to get this done */
1173 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1174 s = splbio();
1175 rf_CopybackReconstructedData(raidPtrs[unit]);
1176 splx(s);
1177 return (0);
1178
1179 /* return the percentage completion of reconstruction */
1180 case RAIDFRAME_CHECKRECON:
1181 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1182 /* This makes no sense on a RAID 0 */
1183 return(EINVAL);
1184 }
1185
1186 row = *(int *) data;
1187 if (row < 0 || row >= raidPtrs[unit]->numRow)
1188 return (EINVAL);
1189 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1190 *(int *) data = 100;
1191 else
1192 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1193 return (0);
1194
1195 /* the sparetable daemon calls this to wait for the kernel to
1196 * need a spare table. this ioctl does not return until a
1197 * spare table is needed. XXX -- calling mpsleep here in the
1198 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1199 * -- I should either compute the spare table in the kernel,
1200 * or have a different -- XXX XXX -- interface (a different
1201 * character device) for delivering the table -- XXX */
1202 #if 0
1203 case RAIDFRAME_SPARET_WAIT:
1204 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1205 while (!rf_sparet_wait_queue)
1206 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1207 waitreq = rf_sparet_wait_queue;
1208 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1209 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1210
1211 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1212
1213 RF_Free(waitreq, sizeof(*waitreq));
1214 return (0);
1215
1216
1217 /* wakes up a process waiting on SPARET_WAIT and puts an error
1218 * code in it that will cause the dameon to exit */
1219 case RAIDFRAME_ABORT_SPARET_WAIT:
1220 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1221 waitreq->fcol = -1;
1222 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1223 waitreq->next = rf_sparet_wait_queue;
1224 rf_sparet_wait_queue = waitreq;
1225 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1226 wakeup(&rf_sparet_wait_queue);
1227 return (0);
1228
1229 /* used by the spare table daemon to deliver a spare table
1230 * into the kernel */
1231 case RAIDFRAME_SEND_SPARET:
1232
1233 /* install the spare table */
1234 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1235
1236 /* respond to the requestor. the return status of the spare
1237 * table installation is passed in the "fcol" field */
1238 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1239 waitreq->fcol = retcode;
1240 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1241 waitreq->next = rf_sparet_resp_queue;
1242 rf_sparet_resp_queue = waitreq;
1243 wakeup(&rf_sparet_resp_queue);
1244 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1245
1246 return (retcode);
1247 #endif
1248
1249
1250 #endif /* RAIDFRAME_RECON > 0 */
1251
1252 default:
1253 break; /* fall through to the os-specific code below */
1254
1255 }
1256
1257 if (!raidPtrs[unit]->valid)
1258 return (EINVAL);
1259
1260 /*
1261 * Add support for "regular" device ioctls here.
1262 */
1263
1264 switch (cmd) {
1265 case DIOCGDINFO:
1266 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1267 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1268 break;
1269
1270 case DIOCGPART:
1271 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1272 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1273 ((struct partinfo *) data)->part =
1274 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1275 break;
1276
1277 case DIOCWDINFO:
1278 db1_printf(("DIOCWDINFO\n"));
1279 case DIOCSDINFO:
1280 db1_printf(("DIOCSDINFO\n"));
1281 if ((error = raidlock(rs)) != 0)
1282 return (error);
1283
1284 rs->sc_flags |= RAIDF_LABELLING;
1285
1286 error = setdisklabel(rs->sc_dkdev.dk_label,
1287 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1288 if (error == 0) {
1289 if (cmd == DIOCWDINFO)
1290 error = writedisklabel(RAIDLABELDEV(dev),
1291 raidstrategy, rs->sc_dkdev.dk_label,
1292 rs->sc_dkdev.dk_cpulabel);
1293 }
1294 rs->sc_flags &= ~RAIDF_LABELLING;
1295
1296 raidunlock(rs);
1297
1298 if (error)
1299 return (error);
1300 break;
1301
1302 case DIOCWLABEL:
1303 db1_printf(("DIOCWLABEL\n"));
1304 if (*(int *) data != 0)
1305 rs->sc_flags |= RAIDF_WLABEL;
1306 else
1307 rs->sc_flags &= ~RAIDF_WLABEL;
1308 break;
1309
1310 case DIOCGDEFLABEL:
1311 db1_printf(("DIOCGDEFLABEL\n"));
1312 raidgetdefaultlabel(raidPtrs[unit], rs,
1313 (struct disklabel *) data);
1314 break;
1315
1316 default:
1317 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1318 }
1319 return (retcode);
1320
1321 }
1322
1323
1324 /* raidinit -- complete the rest of the initialization for the
1325 RAIDframe device. */
1326
1327
1328 static int
1329 raidinit(dev, raidPtr, unit)
1330 dev_t dev;
1331 RF_Raid_t *raidPtr;
1332 int unit;
1333 {
1334 int retcode;
1335 /* int ix; */
1336 /* struct raidbuf *raidbp; */
1337 struct raid_softc *rs;
1338
1339 retcode = 0;
1340
1341 rs = &raid_softc[unit];
1342 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1343 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1344
1345
1346 /* XXX should check return code first... */
1347 rs->sc_flags |= RAIDF_INITED;
1348
1349 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1350
1351 rs->sc_dkdev.dk_name = rs->sc_xname;
1352
1353 /* disk_attach actually creates space for the CPU disklabel, among
1354 * other things, so it's critical to call this *BEFORE* we try putzing
1355 * with disklabels. */
1356
1357 disk_attach(&rs->sc_dkdev);
1358
1359 /* XXX There may be a weird interaction here between this, and
1360 * protectedSectors, as used in RAIDframe. */
1361
1362 rs->sc_size = raidPtr->totalSectors;
1363 rs->sc_dev = dev;
1364
1365 return (retcode);
1366 }
1367
1368 /*
1369 * This kernel thread never exits. It is created once, and persists
1370 * until the system reboots.
1371 */
1372
1373 void
1374 rf_ReconKernelThread()
1375 {
1376 struct rf_recon_req *req;
1377 int s;
1378
1379 /* XXX not sure what spl() level we should be at here... probably
1380 * splbio() */
1381 s = splbio();
1382
1383 while (1) {
1384 /* grab the next reconstruction request from the queue */
1385 LOCK_RECON_Q_MUTEX();
1386 while (!recon_queue) {
1387 UNLOCK_RECON_Q_MUTEX();
1388 tsleep(&recon_queue, PRIBIO,
1389 "raidframe recon", 0);
1390 LOCK_RECON_Q_MUTEX();
1391 }
1392 req = recon_queue;
1393 recon_queue = recon_queue->next;
1394 UNLOCK_RECON_Q_MUTEX();
1395
1396 /*
1397 * If flags specifies that we should start recon, this call
1398 * will not return until reconstruction completes, fails,
1399 * or is aborted.
1400 */
1401 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1402 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1403
1404 RF_Free(req, sizeof(*req));
1405 }
1406 }
1407 /* wake up the daemon & tell it to get us a spare table
1408 * XXX
1409 * the entries in the queues should be tagged with the raidPtr
1410 * so that in the extremely rare case that two recons happen at once,
1411 * we know for which device were requesting a spare table
1412 * XXX
1413 */
1414 int
1415 rf_GetSpareTableFromDaemon(req)
1416 RF_SparetWait_t *req;
1417 {
1418 int retcode;
1419
1420 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1421 req->next = rf_sparet_wait_queue;
1422 rf_sparet_wait_queue = req;
1423 wakeup(&rf_sparet_wait_queue);
1424
1425 /* mpsleep unlocks the mutex */
1426 while (!rf_sparet_resp_queue) {
1427 tsleep(&rf_sparet_resp_queue, PRIBIO,
1428 "raidframe getsparetable", 0);
1429 #if 0
1430 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1431 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1432 MS_LOCK_SIMPLE);
1433 #endif
1434 }
1435 req = rf_sparet_resp_queue;
1436 rf_sparet_resp_queue = req->next;
1437 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1438
1439 retcode = req->fcol;
1440 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1441 * alloc'd */
1442 return (retcode);
1443 }
1444 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1445 * bp & passes it down.
1446 * any calls originating in the kernel must use non-blocking I/O
1447 * do some extra sanity checking to return "appropriate" error values for
1448 * certain conditions (to make some standard utilities work)
1449 *
1450 * Formerly known as: rf_DoAccessKernel
1451 */
1452 void
1453 raidstart(raidPtr)
1454 RF_Raid_t *raidPtr;
1455 {
1456 RF_SectorCount_t num_blocks, pb, sum;
1457 RF_RaidAddr_t raid_addr;
1458 int retcode;
1459 struct partition *pp;
1460 daddr_t blocknum;
1461 int unit;
1462 struct raid_softc *rs;
1463 int do_async;
1464 struct buf *bp;
1465 struct buf *dp;
1466
1467 unit = raidPtr->raidid;
1468 rs = &raid_softc[unit];
1469
1470 /* Check to see if we're at the limit... */
1471 RF_LOCK_MUTEX(raidPtr->mutex);
1472 while (raidPtr->openings > 0) {
1473 RF_UNLOCK_MUTEX(raidPtr->mutex);
1474
1475 /* get the next item, if any, from the queue */
1476 dp = &rs->buf_queue;
1477 bp = dp->b_actf;
1478 if (bp == NULL) {
1479 /* nothing more to do */
1480 return;
1481 }
1482
1483 /* update structures */
1484 dp = bp->b_actf;
1485 if (dp != NULL) {
1486 dp->b_actb = bp->b_actb;
1487 } else {
1488 rs->buf_queue.b_actb = bp->b_actb;
1489 }
1490 *bp->b_actb = dp;
1491
1492 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1493 * partition.. Need to make it absolute to the underlying
1494 * device.. */
1495
1496 blocknum = bp->b_blkno;
1497 if (DISKPART(bp->b_dev) != RAW_PART) {
1498 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1499 blocknum += pp->p_offset;
1500 }
1501
1502 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1503 (int) blocknum));
1504
1505 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1506 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1507
1508 /* *THIS* is where we adjust what block we're going to...
1509 * but DO NOT TOUCH bp->b_blkno!!! */
1510 raid_addr = blocknum;
1511
1512 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1513 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1514 sum = raid_addr + num_blocks + pb;
1515 if (1 || rf_debugKernelAccess) {
1516 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1517 (int) raid_addr, (int) sum, (int) num_blocks,
1518 (int) pb, (int) bp->b_resid));
1519 }
1520 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1521 || (sum < num_blocks) || (sum < pb)) {
1522 bp->b_error = ENOSPC;
1523 bp->b_flags |= B_ERROR;
1524 bp->b_resid = bp->b_bcount;
1525 biodone(bp);
1526 RF_LOCK_MUTEX(raidPtr->mutex);
1527 continue;
1528 }
1529 /*
1530 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1531 */
1532
1533 if (bp->b_bcount & raidPtr->sectorMask) {
1534 bp->b_error = EINVAL;
1535 bp->b_flags |= B_ERROR;
1536 bp->b_resid = bp->b_bcount;
1537 biodone(bp);
1538 RF_LOCK_MUTEX(raidPtr->mutex);
1539 continue;
1540
1541 }
1542 db1_printf(("Calling DoAccess..\n"));
1543
1544
1545 RF_LOCK_MUTEX(raidPtr->mutex);
1546 raidPtr->openings--;
1547 RF_UNLOCK_MUTEX(raidPtr->mutex);
1548
1549 /*
1550 * Everything is async.
1551 */
1552 do_async = 1;
1553
1554 /* don't ever condition on bp->b_flags & B_WRITE.
1555 * always condition on B_READ instead */
1556
1557 /* XXX we're still at splbio() here... do we *really*
1558 need to be? */
1559
1560 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1561 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1562 do_async, raid_addr, num_blocks,
1563 bp->b_un.b_addr, bp, NULL, NULL,
1564 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1565
1566
1567 RF_LOCK_MUTEX(raidPtr->mutex);
1568 }
1569 RF_UNLOCK_MUTEX(raidPtr->mutex);
1570 }
1571
1572
1573
1574
1575 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1576
1577 int
1578 rf_DispatchKernelIO(queue, req)
1579 RF_DiskQueue_t *queue;
1580 RF_DiskQueueData_t *req;
1581 {
1582 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1583 struct buf *bp;
1584 struct raidbuf *raidbp = NULL;
1585 struct raid_softc *rs;
1586 int unit;
1587
1588 /* XXX along with the vnode, we also need the softc associated with
1589 * this device.. */
1590
1591 req->queue = queue;
1592
1593 unit = queue->raidPtr->raidid;
1594
1595 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1596
1597 if (unit >= numraid) {
1598 printf("Invalid unit number: %d %d\n", unit, numraid);
1599 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1600 }
1601 rs = &raid_softc[unit];
1602
1603 /* XXX is this the right place? */
1604 disk_busy(&rs->sc_dkdev);
1605
1606 bp = req->bp;
1607 #if 1
1608 /* XXX when there is a physical disk failure, someone is passing us a
1609 * buffer that contains old stuff!! Attempt to deal with this problem
1610 * without taking a performance hit... (not sure where the real bug
1611 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1612
1613 if (bp->b_flags & B_ERROR) {
1614 bp->b_flags &= ~B_ERROR;
1615 }
1616 if (bp->b_error != 0) {
1617 bp->b_error = 0;
1618 }
1619 #endif
1620 raidbp = RAIDGETBUF(rs);
1621
1622 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1623
1624 /*
1625 * context for raidiodone
1626 */
1627 raidbp->rf_obp = bp;
1628 raidbp->req = req;
1629
1630 switch (req->type) {
1631 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1632 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1633 * queue->row, queue->col); */
1634 /* XXX need to do something extra here.. */
1635 /* I'm leaving this in, as I've never actually seen it used,
1636 * and I'd like folks to report it... GO */
1637 printf(("WAKEUP CALLED\n"));
1638 queue->numOutstanding++;
1639
1640 /* XXX need to glue the original buffer into this?? */
1641
1642 KernelWakeupFunc(&raidbp->rf_buf);
1643 break;
1644
1645 case RF_IO_TYPE_READ:
1646 case RF_IO_TYPE_WRITE:
1647
1648 if (req->tracerec) {
1649 RF_ETIMER_START(req->tracerec->timer);
1650 }
1651 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1652 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1653 req->sectorOffset, req->numSector,
1654 req->buf, KernelWakeupFunc, (void *) req,
1655 queue->raidPtr->logBytesPerSector, req->b_proc);
1656
1657 if (rf_debugKernelAccess) {
1658 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1659 (long) bp->b_blkno));
1660 }
1661 queue->numOutstanding++;
1662 queue->last_deq_sector = req->sectorOffset;
1663 /* acc wouldn't have been let in if there were any pending
1664 * reqs at any other priority */
1665 queue->curPriority = req->priority;
1666 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1667 * req->type, queue->row, queue->col); */
1668
1669 db1_printf(("Going for %c to unit %d row %d col %d\n",
1670 req->type, unit, queue->row, queue->col));
1671 db1_printf(("sector %d count %d (%d bytes) %d\n",
1672 (int) req->sectorOffset, (int) req->numSector,
1673 (int) (req->numSector <<
1674 queue->raidPtr->logBytesPerSector),
1675 (int) queue->raidPtr->logBytesPerSector));
1676 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1677 raidbp->rf_buf.b_vp->v_numoutput++;
1678 }
1679 VOP_STRATEGY(&raidbp->rf_buf);
1680
1681 break;
1682
1683 default:
1684 panic("bad req->type in rf_DispatchKernelIO");
1685 }
1686 db1_printf(("Exiting from DispatchKernelIO\n"));
1687 return (0);
1688 }
1689 /* this is the callback function associated with a I/O invoked from
1690 kernel code.
1691 */
1692 static void
1693 KernelWakeupFunc(vbp)
1694 struct buf *vbp;
1695 {
1696 RF_DiskQueueData_t *req = NULL;
1697 RF_DiskQueue_t *queue;
1698 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1699 struct buf *bp;
1700 struct raid_softc *rs;
1701 int unit;
1702 register int s;
1703
1704 s = splbio(); /* XXX */
1705 db1_printf(("recovering the request queue:\n"));
1706 req = raidbp->req;
1707
1708 bp = raidbp->rf_obp;
1709 #if 0
1710 db1_printf(("bp=0x%x\n", bp));
1711 #endif
1712
1713 queue = (RF_DiskQueue_t *) req->queue;
1714
1715 if (raidbp->rf_buf.b_flags & B_ERROR) {
1716 #if 0
1717 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1718 #endif
1719 bp->b_flags |= B_ERROR;
1720 bp->b_error = raidbp->rf_buf.b_error ?
1721 raidbp->rf_buf.b_error : EIO;
1722 }
1723 #if 0
1724 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1725 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1726 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1727 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1728 #endif
1729
1730 /* XXX methinks this could be wrong... */
1731 #if 1
1732 bp->b_resid = raidbp->rf_buf.b_resid;
1733 #endif
1734
1735 if (req->tracerec) {
1736 RF_ETIMER_STOP(req->tracerec->timer);
1737 RF_ETIMER_EVAL(req->tracerec->timer);
1738 RF_LOCK_MUTEX(rf_tracing_mutex);
1739 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1740 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1741 req->tracerec->num_phys_ios++;
1742 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1743 }
1744 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1745
1746 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1747
1748
1749 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1750 * ballistic, and mark the component as hosed... */
1751 #if 1
1752 if (bp->b_flags & B_ERROR) {
1753 /* Mark the disk as dead */
1754 /* but only mark it once... */
1755 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1756 rf_ds_optimal) {
1757 printf("raid%d: IO Error. Marking %s as failed.\n",
1758 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1759 queue->raidPtr->Disks[queue->row][queue->col].status =
1760 rf_ds_failed;
1761 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1762 queue->raidPtr->numFailures++;
1763 /* XXX here we should bump the version number for each component, and write that data out */
1764 } else { /* Disk is already dead... */
1765 /* printf("Disk already marked as dead!\n"); */
1766 }
1767
1768 }
1769 #endif
1770
1771 rs = &raid_softc[unit];
1772 RAIDPUTBUF(rs, raidbp);
1773
1774
1775 if (bp->b_resid == 0) {
1776 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1777 unit, bp->b_resid, bp->b_bcount));
1778 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1779 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1780 } else {
1781 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1782 }
1783
1784 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1785 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1786 /* printf("Exiting KernelWakeupFunc\n"); */
1787
1788 splx(s); /* XXX */
1789 }
1790
1791
1792
1793 /*
1794 * initialize a buf structure for doing an I/O in the kernel.
1795 */
1796 static void
1797 InitBP(
1798 struct buf * bp,
1799 struct vnode * b_vp,
1800 unsigned rw_flag,
1801 dev_t dev,
1802 RF_SectorNum_t startSect,
1803 RF_SectorCount_t numSect,
1804 caddr_t buf,
1805 void (*cbFunc) (struct buf *),
1806 void *cbArg,
1807 int logBytesPerSector,
1808 struct proc * b_proc)
1809 {
1810 /* bp->b_flags = B_PHYS | rw_flag; */
1811 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1812 bp->b_bcount = numSect << logBytesPerSector;
1813 bp->b_bufsize = bp->b_bcount;
1814 bp->b_error = 0;
1815 bp->b_dev = dev;
1816 db1_printf(("bp->b_dev is %d\n", dev));
1817 bp->b_un.b_addr = buf;
1818 #if 0
1819 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1820 #endif
1821
1822 bp->b_blkno = startSect;
1823 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1824 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1825 if (bp->b_bcount == 0) {
1826 panic("bp->b_bcount is zero in InitBP!!\n");
1827 }
1828 bp->b_proc = b_proc;
1829 bp->b_iodone = cbFunc;
1830 bp->b_vp = b_vp;
1831
1832 }
1833 /* Extras... */
1834
1835 unsigned int
1836 rpcc()
1837 {
1838 /* XXX no clue what this is supposed to do.. my guess is that it's
1839 * supposed to read the CPU cycle counter... */
1840 /* db1_printf("this is supposed to do something useful too!??\n"); */
1841 return (0);
1842 }
1843 #if 0
1844 int
1845 rf_GetSpareTableFromDaemon(req)
1846 RF_SparetWait_t *req;
1847 {
1848 int retcode = 1;
1849 printf("This is supposed to do something useful!!\n"); /* XXX */
1850
1851 return (retcode);
1852
1853 }
1854 #endif
1855
1856 static void
1857 raidgetdefaultlabel(raidPtr, rs, lp)
1858 RF_Raid_t *raidPtr;
1859 struct raid_softc *rs;
1860 struct disklabel *lp;
1861 {
1862 db1_printf(("Building a default label...\n"));
1863 bzero(lp, sizeof(*lp));
1864
1865 /* fabricate a label... */
1866 lp->d_secperunit = raidPtr->totalSectors;
1867 lp->d_secsize = raidPtr->bytesPerSector;
1868 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1869 lp->d_ntracks = 1;
1870 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1871 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1872
1873 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1874 lp->d_type = DTYPE_RAID;
1875 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1876 lp->d_rpm = 3600;
1877 lp->d_interleave = 1;
1878 lp->d_flags = 0;
1879
1880 lp->d_partitions[RAW_PART].p_offset = 0;
1881 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1882 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1883 lp->d_npartitions = RAW_PART + 1;
1884
1885 lp->d_magic = DISKMAGIC;
1886 lp->d_magic2 = DISKMAGIC;
1887 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1888
1889 }
1890 /*
1891 * Read the disklabel from the raid device. If one is not present, fake one
1892 * up.
1893 */
1894 static void
1895 raidgetdisklabel(dev)
1896 dev_t dev;
1897 {
1898 int unit = raidunit(dev);
1899 struct raid_softc *rs = &raid_softc[unit];
1900 char *errstring;
1901 struct disklabel *lp = rs->sc_dkdev.dk_label;
1902 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1903 RF_Raid_t *raidPtr;
1904
1905 db1_printf(("Getting the disklabel...\n"));
1906
1907 bzero(clp, sizeof(*clp));
1908
1909 raidPtr = raidPtrs[unit];
1910
1911 raidgetdefaultlabel(raidPtr, rs, lp);
1912
1913 /*
1914 * Call the generic disklabel extraction routine.
1915 */
1916 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1917 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1918 if (errstring)
1919 raidmakedisklabel(rs);
1920 else {
1921 int i;
1922 struct partition *pp;
1923
1924 /*
1925 * Sanity check whether the found disklabel is valid.
1926 *
1927 * This is necessary since total size of the raid device
1928 * may vary when an interleave is changed even though exactly
1929 * same componets are used, and old disklabel may used
1930 * if that is found.
1931 */
1932 if (lp->d_secperunit != rs->sc_size)
1933 printf("WARNING: %s: "
1934 "total sector size in disklabel (%d) != "
1935 "the size of raid (%ld)\n", rs->sc_xname,
1936 lp->d_secperunit, (long) rs->sc_size);
1937 for (i = 0; i < lp->d_npartitions; i++) {
1938 pp = &lp->d_partitions[i];
1939 if (pp->p_offset + pp->p_size > rs->sc_size)
1940 printf("WARNING: %s: end of partition `%c' "
1941 "exceeds the size of raid (%ld)\n",
1942 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1943 }
1944 }
1945
1946 }
1947 /*
1948 * Take care of things one might want to take care of in the event
1949 * that a disklabel isn't present.
1950 */
1951 static void
1952 raidmakedisklabel(rs)
1953 struct raid_softc *rs;
1954 {
1955 struct disklabel *lp = rs->sc_dkdev.dk_label;
1956 db1_printf(("Making a label..\n"));
1957
1958 /*
1959 * For historical reasons, if there's no disklabel present
1960 * the raw partition must be marked FS_BSDFFS.
1961 */
1962
1963 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1964
1965 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1966
1967 lp->d_checksum = dkcksum(lp);
1968 }
1969 /*
1970 * Lookup the provided name in the filesystem. If the file exists,
1971 * is a valid block device, and isn't being used by anyone else,
1972 * set *vpp to the file's vnode.
1973 * You'll find the original of this in ccd.c
1974 */
1975 int
1976 raidlookup(path, p, vpp)
1977 char *path;
1978 struct proc *p;
1979 struct vnode **vpp; /* result */
1980 {
1981 struct nameidata nd;
1982 struct vnode *vp;
1983 struct vattr va;
1984 int error;
1985
1986 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1987 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1988 #ifdef DEBUG
1989 printf("RAIDframe: vn_open returned %d\n", error);
1990 #endif
1991 return (error);
1992 }
1993 vp = nd.ni_vp;
1994 if (vp->v_usecount > 1) {
1995 VOP_UNLOCK(vp, 0);
1996 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1997 return (EBUSY);
1998 }
1999 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2000 VOP_UNLOCK(vp, 0);
2001 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2002 return (error);
2003 }
2004 /* XXX: eventually we should handle VREG, too. */
2005 if (va.va_type != VBLK) {
2006 VOP_UNLOCK(vp, 0);
2007 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2008 return (ENOTBLK);
2009 }
2010 VOP_UNLOCK(vp, 0);
2011 *vpp = vp;
2012 return (0);
2013 }
2014 /*
2015 * Wait interruptibly for an exclusive lock.
2016 *
2017 * XXX
2018 * Several drivers do this; it should be abstracted and made MP-safe.
2019 * (Hmm... where have we seen this warning before :-> GO )
2020 */
2021 static int
2022 raidlock(rs)
2023 struct raid_softc *rs;
2024 {
2025 int error;
2026
2027 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2028 rs->sc_flags |= RAIDF_WANTED;
2029 if ((error =
2030 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2031 return (error);
2032 }
2033 rs->sc_flags |= RAIDF_LOCKED;
2034 return (0);
2035 }
2036 /*
2037 * Unlock and wake up any waiters.
2038 */
2039 static void
2040 raidunlock(rs)
2041 struct raid_softc *rs;
2042 {
2043
2044 rs->sc_flags &= ~RAIDF_LOCKED;
2045 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2046 rs->sc_flags &= ~RAIDF_WANTED;
2047 wakeup(rs);
2048 }
2049 }
2050
2051
2052 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2053 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2054
2055 int
2056 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2057 {
2058 RF_ComponentLabel_t component_label;
2059 raidread_component_label(dev, b_vp, &component_label);
2060 component_label.mod_counter = mod_counter;
2061 component_label.clean = RF_RAID_CLEAN;
2062 raidwrite_component_label(dev, b_vp, &component_label);
2063 return(0);
2064 }
2065
2066
2067 int
2068 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2069 {
2070 RF_ComponentLabel_t component_label;
2071 raidread_component_label(dev, b_vp, &component_label);
2072 component_label.mod_counter = mod_counter;
2073 component_label.clean = RF_RAID_DIRTY;
2074 raidwrite_component_label(dev, b_vp, &component_label);
2075 return(0);
2076 }
2077
2078 /* ARGSUSED */
2079 int
2080 raidread_component_label(dev, b_vp, component_label)
2081 dev_t dev;
2082 struct vnode *b_vp;
2083 RF_ComponentLabel_t *component_label;
2084 {
2085 struct buf *bp;
2086 int error;
2087
2088 /* XXX should probably ensure that we don't try to do this if
2089 someone has changed rf_protected_sectors. */
2090
2091 /* get a block of the appropriate size... */
2092 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2093 bp->b_dev = dev;
2094
2095 /* get our ducks in a row for the read */
2096 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2097 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2098 bp->b_flags = B_BUSY | B_READ;
2099 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2100
2101 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2102
2103 error = biowait(bp);
2104
2105 if (!error) {
2106 memcpy(component_label, bp->b_un.b_addr,
2107 sizeof(RF_ComponentLabel_t));
2108 #if 0
2109 printf("raidread_component_label: got component label:\n");
2110 printf("Version: %d\n",component_label->version);
2111 printf("Serial Number: %d\n",component_label->serial_number);
2112 printf("Mod counter: %d\n",component_label->mod_counter);
2113 printf("Row: %d\n", component_label->row);
2114 printf("Column: %d\n", component_label->column);
2115 printf("Num Rows: %d\n", component_label->num_rows);
2116 printf("Num Columns: %d\n", component_label->num_columns);
2117 printf("Clean: %d\n", component_label->clean);
2118 printf("Status: %d\n", component_label->status);
2119 #endif
2120 } else {
2121 printf("Failed to read RAID component label!\n");
2122 }
2123
2124 bp->b_flags = B_INVAL | B_AGE;
2125 brelse(bp);
2126 return(error);
2127 }
2128 /* ARGSUSED */
2129 int
2130 raidwrite_component_label(dev, b_vp, component_label)
2131 dev_t dev;
2132 struct vnode *b_vp;
2133 RF_ComponentLabel_t *component_label;
2134 {
2135 struct buf *bp;
2136 int error;
2137
2138 /* get a block of the appropriate size... */
2139 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2140 bp->b_dev = dev;
2141
2142 /* get our ducks in a row for the write */
2143 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2144 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2145 bp->b_flags = B_BUSY | B_WRITE;
2146 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2147
2148 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2149
2150 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2151
2152 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2153 error = biowait(bp);
2154 bp->b_flags = B_INVAL | B_AGE;
2155 brelse(bp);
2156 if (error) {
2157 printf("Failed to write RAID component info!\n");
2158 }
2159
2160 return(error);
2161 }
2162
2163 void
2164 rf_markalldirty( raidPtr )
2165 RF_Raid_t *raidPtr;
2166 {
2167 RF_ComponentLabel_t c_label;
2168 int r,c;
2169
2170 raidPtr->mod_counter++;
2171 for (r = 0; r < raidPtr->numRow; r++) {
2172 for (c = 0; c < raidPtr->numCol; c++) {
2173 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2174 raidread_component_label(
2175 raidPtr->Disks[r][c].dev,
2176 raidPtr->raid_cinfo[r][c].ci_vp,
2177 &c_label);
2178 if (c_label.status == rf_ds_spared) {
2179 /* XXX do something special...
2180 but whatever you do, don't
2181 try to access it!! */
2182 } else {
2183 #if 0
2184 c_label.status =
2185 raidPtr->Disks[r][c].status;
2186 raidwrite_component_label(
2187 raidPtr->Disks[r][c].dev,
2188 raidPtr->raid_cinfo[r][c].ci_vp,
2189 &c_label);
2190 #endif
2191 raidmarkdirty(
2192 raidPtr->Disks[r][c].dev,
2193 raidPtr->raid_cinfo[r][c].ci_vp,
2194 raidPtr->mod_counter);
2195 }
2196 }
2197 }
2198 }
2199 /* printf("Component labels marked dirty.\n"); */
2200 #if 0
2201 for( c = 0; c < raidPtr->numSpare ; c++) {
2202 sparecol = raidPtr->numCol + c;
2203 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2204 /*
2205
2206 XXX this is where we get fancy and map this spare
2207 into it's correct spot in the array.
2208
2209 */
2210 /*
2211
2212 we claim this disk is "optimal" if it's
2213 rf_ds_used_spare, as that means it should be
2214 directly substitutable for the disk it replaced.
2215 We note that too...
2216
2217 */
2218
2219 for(i=0;i<raidPtr->numRow;i++) {
2220 for(j=0;j<raidPtr->numCol;j++) {
2221 if ((raidPtr->Disks[i][j].spareRow ==
2222 r) &&
2223 (raidPtr->Disks[i][j].spareCol ==
2224 sparecol)) {
2225 srow = r;
2226 scol = sparecol;
2227 break;
2228 }
2229 }
2230 }
2231
2232 raidread_component_label(
2233 raidPtr->Disks[r][sparecol].dev,
2234 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2235 &c_label);
2236 /* make sure status is noted */
2237 c_label.version = RF_COMPONENT_LABEL_VERSION;
2238 c_label.mod_counter = raidPtr->mod_counter;
2239 c_label.serial_number = raidPtr->serial_number;
2240 c_label.row = srow;
2241 c_label.column = scol;
2242 c_label.num_rows = raidPtr->numRow;
2243 c_label.num_columns = raidPtr->numCol;
2244 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2245 c_label.status = rf_ds_optimal;
2246 raidwrite_component_label(
2247 raidPtr->Disks[r][sparecol].dev,
2248 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2249 &c_label);
2250 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2251 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2252 }
2253 }
2254
2255 #endif
2256 }
2257
2258
2259 void
2260 rf_update_component_labels( raidPtr )
2261 RF_Raid_t *raidPtr;
2262 {
2263 RF_ComponentLabel_t c_label;
2264 int sparecol;
2265 int r,c;
2266 int i,j;
2267 int srow, scol;
2268
2269 srow = -1;
2270 scol = -1;
2271
2272 /* XXX should do extra checks to make sure things really are clean,
2273 rather than blindly setting the clean bit... */
2274
2275 raidPtr->mod_counter++;
2276
2277 for (r = 0; r < raidPtr->numRow; r++) {
2278 for (c = 0; c < raidPtr->numCol; c++) {
2279 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2280 raidread_component_label(
2281 raidPtr->Disks[r][c].dev,
2282 raidPtr->raid_cinfo[r][c].ci_vp,
2283 &c_label);
2284 /* make sure status is noted */
2285 c_label.status = rf_ds_optimal;
2286 raidwrite_component_label(
2287 raidPtr->Disks[r][c].dev,
2288 raidPtr->raid_cinfo[r][c].ci_vp,
2289 &c_label);
2290 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2291 raidmarkclean(
2292 raidPtr->Disks[r][c].dev,
2293 raidPtr->raid_cinfo[r][c].ci_vp,
2294 raidPtr->mod_counter);
2295 }
2296 }
2297 /* else we don't touch it.. */
2298 #if 0
2299 else if (raidPtr->Disks[r][c].status !=
2300 rf_ds_failed) {
2301 raidread_component_label(
2302 raidPtr->Disks[r][c].dev,
2303 raidPtr->raid_cinfo[r][c].ci_vp,
2304 &c_label);
2305 /* make sure status is noted */
2306 c_label.status =
2307 raidPtr->Disks[r][c].status;
2308 raidwrite_component_label(
2309 raidPtr->Disks[r][c].dev,
2310 raidPtr->raid_cinfo[r][c].ci_vp,
2311 &c_label);
2312 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2313 raidmarkclean(
2314 raidPtr->Disks[r][c].dev,
2315 raidPtr->raid_cinfo[r][c].ci_vp,
2316 raidPtr->mod_counter);
2317 }
2318 }
2319 #endif
2320 }
2321 }
2322
2323 for( c = 0; c < raidPtr->numSpare ; c++) {
2324 sparecol = raidPtr->numCol + c;
2325 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2326 /*
2327
2328 we claim this disk is "optimal" if it's
2329 rf_ds_used_spare, as that means it should be
2330 directly substitutable for the disk it replaced.
2331 We note that too...
2332
2333 */
2334
2335 for(i=0;i<raidPtr->numRow;i++) {
2336 for(j=0;j<raidPtr->numCol;j++) {
2337 if ((raidPtr->Disks[i][j].spareRow ==
2338 0) &&
2339 (raidPtr->Disks[i][j].spareCol ==
2340 sparecol)) {
2341 srow = i;
2342 scol = j;
2343 break;
2344 }
2345 }
2346 }
2347
2348 raidread_component_label(
2349 raidPtr->Disks[0][sparecol].dev,
2350 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2351 &c_label);
2352 /* make sure status is noted */
2353 c_label.version = RF_COMPONENT_LABEL_VERSION;
2354 c_label.mod_counter = raidPtr->mod_counter;
2355 c_label.serial_number = raidPtr->serial_number;
2356 c_label.row = srow;
2357 c_label.column = scol;
2358 c_label.num_rows = raidPtr->numRow;
2359 c_label.num_columns = raidPtr->numCol;
2360 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2361 c_label.status = rf_ds_optimal;
2362 raidwrite_component_label(
2363 raidPtr->Disks[0][sparecol].dev,
2364 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2365 &c_label);
2366 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2367 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2368 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2369 raidPtr->mod_counter);
2370 }
2371 }
2372 }
2373 /* printf("Component labels updated\n"); */
2374 }
2375