rf_netbsdkintf.c revision 1.16.2.8 1 /* $NetBSD: rf_netbsdkintf.c,v 1.16.2.8 1999/12/20 13:38:41 he Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 struct buf buf_queue; /* used for the device queue */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 static int numraid = 0;
271
272 /*
273 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
274 * Be aware that large numbers can allow the driver to consume a lot of
275 * kernel memory, especially on writes...
276 */
277
278 #ifndef RAIDOUTSTANDING
279 #define RAIDOUTSTANDING 10
280 #endif
281
282 #define RAIDLABELDEV(dev) \
283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
284
285 /* declared here, and made public, for the benefit of KVM stuff.. */
286 struct raid_softc *raid_softc;
287
288 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
289 struct disklabel *));
290 static void raidgetdisklabel __P((dev_t));
291 static void raidmakedisklabel __P((struct raid_softc *));
292
293 static int raidlock __P((struct raid_softc *));
294 static void raidunlock __P((struct raid_softc *));
295 int raidlookup __P((char *, struct proc * p, struct vnode **));
296
297 static void rf_markalldirty __P((RF_Raid_t *));
298
299 void
300 raidattach(num)
301 int num;
302 {
303 int raidID;
304 int i, rc;
305
306 #ifdef DEBUG
307 printf("raidattach: Asked for %d units\n", num);
308 #endif
309
310 if (num <= 0) {
311 #ifdef DIAGNOSTIC
312 panic("raidattach: count <= 0");
313 #endif
314 return;
315 }
316 /* This is where all the initialization stuff gets done. */
317
318 /* Make some space for requested number of units... */
319
320 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
321 if (raidPtrs == NULL) {
322 panic("raidPtrs is NULL!!\n");
323 }
324
325 rc = rf_mutex_init(&rf_sparet_wait_mutex);
326 if (rc) {
327 RF_PANIC();
328 }
329
330 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
331 recon_queue = NULL;
332
333 for (i = 0; i < numraid; i++)
334 raidPtrs[i] = NULL;
335 rc = rf_BootRaidframe();
336 if (rc == 0)
337 printf("Kernelized RAIDframe activated\n");
338 else
339 panic("Serious error booting RAID!!\n");
340
341 rf_kbooted = RFK_BOOT_GOOD;
342
343 /* put together some datastructures like the CCD device does.. This
344 * lets us lock the device and what-not when it gets opened. */
345
346 raid_softc = (struct raid_softc *)
347 malloc(num * sizeof(struct raid_softc),
348 M_RAIDFRAME, M_NOWAIT);
349 if (raid_softc == NULL) {
350 printf("WARNING: no memory for RAIDframe driver\n");
351 return;
352 }
353 numraid = num;
354 bzero(raid_softc, num * sizeof(struct raid_softc));
355
356 for (raidID = 0; raidID < num; raidID++) {
357 raid_softc[raidID].buf_queue.b_actf = NULL;
358 raid_softc[raidID].buf_queue.b_actb =
359 &raid_softc[raidID].buf_queue.b_actf;
360 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
361 (RF_Raid_t *));
362 if (raidPtrs[raidID] == NULL) {
363 printf("raidPtrs[%d] is NULL\n", raidID);
364 }
365 }
366 }
367
368
369 int
370 raidsize(dev)
371 dev_t dev;
372 {
373 struct raid_softc *rs;
374 struct disklabel *lp;
375 int part, unit, omask, size;
376
377 unit = raidunit(dev);
378 if (unit >= numraid)
379 return (-1);
380 rs = &raid_softc[unit];
381
382 if ((rs->sc_flags & RAIDF_INITED) == 0)
383 return (-1);
384
385 part = DISKPART(dev);
386 omask = rs->sc_dkdev.dk_openmask & (1 << part);
387 lp = rs->sc_dkdev.dk_label;
388
389 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
390 return (-1);
391
392 if (lp->d_partitions[part].p_fstype != FS_SWAP)
393 size = -1;
394 else
395 size = lp->d_partitions[part].p_size *
396 (lp->d_secsize / DEV_BSIZE);
397
398 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
399 return (-1);
400
401 return (size);
402
403 }
404
405 int
406 raiddump(dev, blkno, va, size)
407 dev_t dev;
408 daddr_t blkno;
409 caddr_t va;
410 size_t size;
411 {
412 /* Not implemented. */
413 return ENXIO;
414 }
415 /* ARGSUSED */
416 int
417 raidopen(dev, flags, fmt, p)
418 dev_t dev;
419 int flags, fmt;
420 struct proc *p;
421 {
422 int unit = raidunit(dev);
423 struct raid_softc *rs;
424 struct disklabel *lp;
425 int part, pmask;
426 int error = 0;
427
428 if (unit >= numraid)
429 return (ENXIO);
430 rs = &raid_softc[unit];
431
432 if ((error = raidlock(rs)) != 0)
433 return (error);
434 lp = rs->sc_dkdev.dk_label;
435
436 part = DISKPART(dev);
437 pmask = (1 << part);
438
439 db1_printf(("Opening raid device number: %d partition: %d\n",
440 unit, part));
441
442
443 if ((rs->sc_flags & RAIDF_INITED) &&
444 (rs->sc_dkdev.dk_openmask == 0))
445 raidgetdisklabel(dev);
446
447 /* make sure that this partition exists */
448
449 if (part != RAW_PART) {
450 db1_printf(("Not a raw partition..\n"));
451 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
452 ((part >= lp->d_npartitions) ||
453 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
454 error = ENXIO;
455 raidunlock(rs);
456 db1_printf(("Bailing out...\n"));
457 return (error);
458 }
459 }
460 /* Prevent this unit from being unconfigured while open. */
461 switch (fmt) {
462 case S_IFCHR:
463 rs->sc_dkdev.dk_copenmask |= pmask;
464 break;
465
466 case S_IFBLK:
467 rs->sc_dkdev.dk_bopenmask |= pmask;
468 break;
469 }
470
471 if ((rs->sc_dkdev.dk_openmask == 0) &&
472 ((rs->sc_flags & RAIDF_INITED) != 0)) {
473 /* First one... mark things as dirty... Note that we *MUST*
474 have done a configure before this. I DO NOT WANT TO BE
475 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
476 THAT THEY BELONG TOGETHER!!!!! */
477 /* XXX should check to see if we're only open for reading
478 here... If so, we needn't do this, but then need some
479 other way of keeping track of what's happened.. */
480
481 rf_markalldirty( raidPtrs[unit] );
482 }
483
484
485 rs->sc_dkdev.dk_openmask =
486 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
487
488 raidunlock(rs);
489
490 return (error);
491
492
493 }
494 /* ARGSUSED */
495 int
496 raidclose(dev, flags, fmt, p)
497 dev_t dev;
498 int flags, fmt;
499 struct proc *p;
500 {
501 int unit = raidunit(dev);
502 struct raid_softc *rs;
503 int error = 0;
504 int part;
505
506 if (unit >= numraid)
507 return (ENXIO);
508 rs = &raid_softc[unit];
509
510 if ((error = raidlock(rs)) != 0)
511 return (error);
512
513 part = DISKPART(dev);
514
515 /* ...that much closer to allowing unconfiguration... */
516 switch (fmt) {
517 case S_IFCHR:
518 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
519 break;
520
521 case S_IFBLK:
522 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
523 break;
524 }
525 rs->sc_dkdev.dk_openmask =
526 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
527
528 if ((rs->sc_dkdev.dk_openmask == 0) &&
529 ((rs->sc_flags & RAIDF_INITED) != 0)) {
530 /* Last one... device is not unconfigured yet.
531 Device shutdown has taken care of setting the
532 clean bits if RAIDF_INITED is not set
533 mark things as clean... */
534 rf_update_component_labels( raidPtrs[unit] );
535 }
536
537 raidunlock(rs);
538 return (0);
539
540 }
541
542 void
543 raidstrategy(bp)
544 register struct buf *bp;
545 {
546 register int s;
547
548 unsigned int raidID = raidunit(bp->b_dev);
549 RF_Raid_t *raidPtr;
550 struct raid_softc *rs = &raid_softc[raidID];
551 struct disklabel *lp;
552 struct buf *dp;
553 int wlabel;
554
555 #if 0
556 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
557 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
558 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
559 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
560 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
561
562 if (bp->b_flags & B_READ)
563 db1_printf(("READ\n"));
564 else
565 db1_printf(("WRITE\n"));
566 #endif
567 if (rf_kbooted != RFK_BOOT_GOOD)
568 return;
569 if (raidID >= numraid || !raidPtrs[raidID]) {
570 bp->b_error = ENODEV;
571 bp->b_flags |= B_ERROR;
572 bp->b_resid = bp->b_bcount;
573 biodone(bp);
574 return;
575 }
576 raidPtr = raidPtrs[raidID];
577 if (!raidPtr->valid) {
578 bp->b_error = ENODEV;
579 bp->b_flags |= B_ERROR;
580 bp->b_resid = bp->b_bcount;
581 biodone(bp);
582 return;
583 }
584 if (bp->b_bcount == 0) {
585 db1_printf(("b_bcount is zero..\n"));
586 biodone(bp);
587 return;
588 }
589 lp = rs->sc_dkdev.dk_label;
590
591 /*
592 * Do bounds checking and adjust transfer. If there's an
593 * error, the bounds check will flag that for us.
594 */
595
596 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
597 if (DISKPART(bp->b_dev) != RAW_PART)
598 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
599 db1_printf(("Bounds check failed!!:%d %d\n",
600 (int) bp->b_blkno, (int) wlabel));
601 biodone(bp);
602 return;
603 }
604 s = splbio();
605
606 bp->b_resid = 0;
607
608 /* stuff it onto our queue */
609
610 dp = &rs->buf_queue;
611 bp->b_actf = NULL;
612 bp->b_actb = dp->b_actb;
613 *dp->b_actb = bp;
614 dp->b_actb = &bp->b_actf;
615
616 raidstart(raidPtrs[raidID]);
617
618 splx(s);
619 }
620 /* ARGSUSED */
621 int
622 raidread(dev, uio, flags)
623 dev_t dev;
624 struct uio *uio;
625 int flags;
626 {
627 int unit = raidunit(dev);
628 struct raid_softc *rs;
629 int part;
630
631 if (unit >= numraid)
632 return (ENXIO);
633 rs = &raid_softc[unit];
634
635 if ((rs->sc_flags & RAIDF_INITED) == 0)
636 return (ENXIO);
637 part = DISKPART(dev);
638
639 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
640
641 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
642
643 }
644 /* ARGSUSED */
645 int
646 raidwrite(dev, uio, flags)
647 dev_t dev;
648 struct uio *uio;
649 int flags;
650 {
651 int unit = raidunit(dev);
652 struct raid_softc *rs;
653
654 if (unit >= numraid)
655 return (ENXIO);
656 rs = &raid_softc[unit];
657
658 if ((rs->sc_flags & RAIDF_INITED) == 0)
659 return (ENXIO);
660 db1_printf(("raidwrite\n"));
661 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
662
663 }
664
665 int
666 raidioctl(dev, cmd, data, flag, p)
667 dev_t dev;
668 u_long cmd;
669 caddr_t data;
670 int flag;
671 struct proc *p;
672 {
673 int unit = raidunit(dev);
674 int error = 0;
675 int part, pmask;
676 struct raid_softc *rs;
677 #if 0
678 int r, c;
679 #endif
680 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
681
682 /* struct ccdbuf *cbp; */
683 /* struct raidbuf *raidbp; */
684 RF_Config_t *k_cfg, *u_cfg;
685 u_char *specific_buf;
686 int retcode = 0;
687 int row;
688 int column;
689 int s;
690 struct rf_recon_req *rrcopy, *rr;
691 RF_ComponentLabel_t *component_label;
692 RF_ComponentLabel_t ci_label;
693 RF_ComponentLabel_t **c_label_ptr;
694 RF_SingleComponent_t *sparePtr,*componentPtr;
695 RF_SingleComponent_t hot_spare;
696 RF_SingleComponent_t component;
697
698 if (unit >= numraid)
699 return (ENXIO);
700 rs = &raid_softc[unit];
701
702 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
703 (int) DISKPART(dev), (int) unit, (int) cmd));
704
705 /* Must be open for writes for these commands... */
706 switch (cmd) {
707 case DIOCSDINFO:
708 case DIOCWDINFO:
709 case DIOCWLABEL:
710 if ((flag & FWRITE) == 0)
711 return (EBADF);
712 }
713
714 /* Must be initialized for these... */
715 switch (cmd) {
716 case DIOCGDINFO:
717 case DIOCSDINFO:
718 case DIOCWDINFO:
719 case DIOCGPART:
720 case DIOCWLABEL:
721 case DIOCGDEFLABEL:
722 case RAIDFRAME_SHUTDOWN:
723 case RAIDFRAME_REWRITEPARITY:
724 case RAIDFRAME_GET_INFO:
725 case RAIDFRAME_RESET_ACCTOTALS:
726 case RAIDFRAME_GET_ACCTOTALS:
727 case RAIDFRAME_KEEP_ACCTOTALS:
728 case RAIDFRAME_GET_SIZE:
729 case RAIDFRAME_FAIL_DISK:
730 case RAIDFRAME_COPYBACK:
731 case RAIDFRAME_CHECKRECON:
732 case RAIDFRAME_GET_COMPONENT_LABEL:
733 case RAIDFRAME_SET_COMPONENT_LABEL:
734 case RAIDFRAME_ADD_HOT_SPARE:
735 case RAIDFRAME_REMOVE_HOT_SPARE:
736 case RAIDFRAME_INIT_LABELS:
737 case RAIDFRAME_REBUILD_IN_PLACE:
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 }
741
742 switch (cmd) {
743
744
745 /* configure the system */
746 case RAIDFRAME_CONFIGURE:
747
748 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
749 /* copy-in the configuration information */
750 /* data points to a pointer to the configuration structure */
751 u_cfg = *((RF_Config_t **) data);
752 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
753 if (k_cfg == NULL) {
754 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
755 return (ENOMEM);
756 }
757 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
758 sizeof(RF_Config_t));
759 if (retcode) {
760 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
761 retcode));
762 return (retcode);
763 }
764 /* allocate a buffer for the layout-specific data, and copy it
765 * in */
766 if (k_cfg->layoutSpecificSize) {
767 if (k_cfg->layoutSpecificSize > 10000) {
768 /* sanity check */
769 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
770 return (EINVAL);
771 }
772 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
773 (u_char *));
774 if (specific_buf == NULL) {
775 RF_Free(k_cfg, sizeof(RF_Config_t));
776 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
777 return (ENOMEM);
778 }
779 retcode = copyin(k_cfg->layoutSpecific,
780 (caddr_t) specific_buf,
781 k_cfg->layoutSpecificSize);
782 if (retcode) {
783 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
784 retcode));
785 return (retcode);
786 }
787 } else
788 specific_buf = NULL;
789 k_cfg->layoutSpecific = specific_buf;
790
791 /* should do some kind of sanity check on the configuration.
792 * Store the sum of all the bytes in the last byte? */
793
794 #if 0
795 db1_printf(("Considering configuring the system.:%d 0x%x\n",
796 unit, p));
797 #endif
798
799 /* We need the pointer to this a little deeper, so stash it
800 * here... */
801
802 raidPtrs[unit]->proc = p;
803
804 /* configure the system */
805
806 raidPtrs[unit]->raidid = unit;
807
808 retcode = rf_Configure(raidPtrs[unit], k_cfg);
809
810 /* allow this many simultaneous IO's to this RAID device */
811 raidPtrs[unit]->openings = RAIDOUTSTANDING;
812
813 if (retcode == 0) {
814 retcode = raidinit(dev, raidPtrs[unit], unit);
815 rf_markalldirty( raidPtrs[unit] );
816 }
817 /* free the buffers. No return code here. */
818 if (k_cfg->layoutSpecificSize) {
819 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
820 }
821 RF_Free(k_cfg, sizeof(RF_Config_t));
822
823 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
824 retcode));
825
826 return (retcode);
827
828 /* shutdown the system */
829 case RAIDFRAME_SHUTDOWN:
830
831 if ((error = raidlock(rs)) != 0)
832 return (error);
833
834 /*
835 * If somebody has a partition mounted, we shouldn't
836 * shutdown.
837 */
838
839 part = DISKPART(dev);
840 pmask = (1 << part);
841 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
842 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
843 (rs->sc_dkdev.dk_copenmask & pmask))) {
844 raidunlock(rs);
845 return (EBUSY);
846 }
847
848 if (rf_debugKernelAccess) {
849 printf("call shutdown\n");
850 }
851 raidPtrs[unit]->proc = p; /* XXX necessary evil */
852
853 retcode = rf_Shutdown(raidPtrs[unit]);
854
855 db1_printf(("Done main shutdown\n"));
856
857 pool_destroy(&rs->sc_cbufpool);
858 db1_printf(("Done freeing component buffer freelist\n"));
859
860 /* It's no longer initialized... */
861 rs->sc_flags &= ~RAIDF_INITED;
862
863 /* Detach the disk. */
864 disk_detach(&rs->sc_dkdev);
865
866 raidunlock(rs);
867
868 return (retcode);
869 case RAIDFRAME_GET_COMPONENT_LABEL:
870 c_label_ptr = (RF_ComponentLabel_t **) data;
871 /* need to read the component label for the disk indicated
872 by row,column in component_label
873 XXX need to sanity check these values!!!
874 */
875
876 /* For practice, let's get it directly fromdisk, rather
877 than from the in-core copy */
878 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
879 (RF_ComponentLabel_t *));
880 if (component_label == NULL)
881 return (ENOMEM);
882
883 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
884
885 retcode = copyin( *c_label_ptr, component_label,
886 sizeof(RF_ComponentLabel_t));
887
888 if (retcode) {
889 return(retcode);
890 }
891
892 row = component_label->row;
893 column = component_label->column;
894
895 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
896 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
897 return(EINVAL);
898 }
899
900 raidread_component_label(
901 raidPtrs[unit]->Disks[row][column].dev,
902 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
903 component_label );
904
905 retcode = copyout((caddr_t) component_label,
906 (caddr_t) *c_label_ptr,
907 sizeof(RF_ComponentLabel_t));
908 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
909 return (retcode);
910
911 case RAIDFRAME_SET_COMPONENT_LABEL:
912 component_label = (RF_ComponentLabel_t *) data;
913
914 /* XXX check the label for valid stuff... */
915 /* Note that some things *should not* get modified --
916 the user should be re-initing the labels instead of
917 trying to patch things.
918 */
919
920 printf("Got component label:\n");
921 printf("Version: %d\n",component_label->version);
922 printf("Serial Number: %d\n",component_label->serial_number);
923 printf("Mod counter: %d\n",component_label->mod_counter);
924 printf("Row: %d\n", component_label->row);
925 printf("Column: %d\n", component_label->column);
926 printf("Num Rows: %d\n", component_label->num_rows);
927 printf("Num Columns: %d\n", component_label->num_columns);
928 printf("Clean: %d\n", component_label->clean);
929 printf("Status: %d\n", component_label->status);
930
931 row = component_label->row;
932 column = component_label->column;
933
934 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
935 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
936 return(EINVAL);
937 }
938
939 /* XXX this isn't allowed to do anything for now :-) */
940 #if 0
941 raidwrite_component_label(
942 raidPtrs[unit]->Disks[row][column].dev,
943 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
944 component_label );
945 #endif
946 return (0);
947
948 case RAIDFRAME_INIT_LABELS:
949 component_label = (RF_ComponentLabel_t *) data;
950 /*
951 we only want the serial number from
952 the above. We get all the rest of the information
953 from the config that was used to create this RAID
954 set.
955 */
956
957 raidPtrs[unit]->serial_number = component_label->serial_number;
958 /* current version number */
959 ci_label.version = RF_COMPONENT_LABEL_VERSION;
960 ci_label.serial_number = component_label->serial_number;
961 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
962 ci_label.num_rows = raidPtrs[unit]->numRow;
963 ci_label.num_columns = raidPtrs[unit]->numCol;
964 ci_label.clean = RF_RAID_DIRTY; /* not clean */
965 ci_label.status = rf_ds_optimal; /* "It's good!" */
966
967 for(row=0;row<raidPtrs[unit]->numRow;row++) {
968 ci_label.row = row;
969 for(column=0;column<raidPtrs[unit]->numCol;column++) {
970 ci_label.column = column;
971 raidwrite_component_label(
972 raidPtrs[unit]->Disks[row][column].dev,
973 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
974 &ci_label );
975 }
976 }
977
978 return (retcode);
979
980 /* initialize all parity */
981 case RAIDFRAME_REWRITEPARITY:
982
983 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
984 /* Parity for RAID 0 is trivially correct */
985 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
986 return(0);
987 }
988
989 /* borrow the thread of the requesting process */
990 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
991 retcode = rf_RewriteParity(raidPtrs[unit]);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1010 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1011 return(retcode);
1012
1013 case RAIDFRAME_REMOVE_HOT_SPARE:
1014 return(retcode);
1015
1016 case RAIDFRAME_REBUILD_IN_PLACE:
1017
1018 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1019 /* Can't do this on a RAID 0!! */
1020 return(EINVAL);
1021 }
1022
1023 componentPtr = (RF_SingleComponent_t *) data;
1024 memcpy( &component, componentPtr,
1025 sizeof(RF_SingleComponent_t));
1026 row = component.row;
1027 column = component.column;
1028 printf("Rebuild: %d %d\n",row, column);
1029 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1030 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1031 return(EINVAL);
1032 }
1033 printf("Attempting a rebuild in place\n");
1034 s = splbio();
1035 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1036 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1037 splx(s);
1038 return(retcode);
1039
1040 /* issue a test-unit-ready through raidframe to the indicated
1041 * device */
1042 #if 0 /* XXX not supported yet (ever?) */
1043 case RAIDFRAME_TUR:
1044 /* debug only */
1045 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1046 return (retcode);
1047 #endif
1048 case RAIDFRAME_GET_INFO:
1049 {
1050 RF_Raid_t *raid = raidPtrs[unit];
1051 RF_DeviceConfig_t *cfg, **ucfgp;
1052 int i, j, d;
1053
1054 if (!raid->valid)
1055 return (ENODEV);
1056 ucfgp = (RF_DeviceConfig_t **) data;
1057 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1058 (RF_DeviceConfig_t *));
1059 if (cfg == NULL)
1060 return (ENOMEM);
1061 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1062 cfg->rows = raid->numRow;
1063 cfg->cols = raid->numCol;
1064 cfg->ndevs = raid->numRow * raid->numCol;
1065 if (cfg->ndevs >= RF_MAX_DISKS) {
1066 cfg->ndevs = 0;
1067 return (ENOMEM);
1068 }
1069 cfg->nspares = raid->numSpare;
1070 if (cfg->nspares >= RF_MAX_DISKS) {
1071 cfg->nspares = 0;
1072 return (ENOMEM);
1073 }
1074 cfg->maxqdepth = raid->maxQueueDepth;
1075 d = 0;
1076 for (i = 0; i < cfg->rows; i++) {
1077 for (j = 0; j < cfg->cols; j++) {
1078 cfg->devs[d] = raid->Disks[i][j];
1079 d++;
1080 }
1081 }
1082 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1083 cfg->spares[i] = raid->Disks[0][j];
1084 }
1085 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1086 sizeof(RF_DeviceConfig_t));
1087 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1088
1089 return (retcode);
1090 }
1091 break;
1092
1093 case RAIDFRAME_RESET_ACCTOTALS:
1094 {
1095 RF_Raid_t *raid = raidPtrs[unit];
1096
1097 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1098 return (0);
1099 }
1100 break;
1101
1102 case RAIDFRAME_GET_ACCTOTALS:
1103 {
1104 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1105 RF_Raid_t *raid = raidPtrs[unit];
1106
1107 *totals = raid->acc_totals;
1108 return (0);
1109 }
1110 break;
1111
1112 case RAIDFRAME_KEEP_ACCTOTALS:
1113 {
1114 RF_Raid_t *raid = raidPtrs[unit];
1115 int *keep = (int *) data;
1116
1117 raid->keep_acc_totals = *keep;
1118 return (0);
1119 }
1120 break;
1121
1122 case RAIDFRAME_GET_SIZE:
1123 *(int *) data = raidPtrs[unit]->totalSectors;
1124 return (0);
1125
1126 #define RAIDFRAME_RECON 1
1127 /* XXX The above should probably be set somewhere else!! GO */
1128 #if RAIDFRAME_RECON > 0
1129
1130 /* fail a disk & optionally start reconstruction */
1131 case RAIDFRAME_FAIL_DISK:
1132
1133 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1134 /* Can't do this on a RAID 0!! */
1135 return(EINVAL);
1136 }
1137
1138 rr = (struct rf_recon_req *) data;
1139
1140 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1141 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1142 return (EINVAL);
1143
1144 printf("raid%d: Failing the disk: row: %d col: %d\n",
1145 unit, rr->row, rr->col);
1146
1147 /* make a copy of the recon request so that we don't rely on
1148 * the user's buffer */
1149 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1150 bcopy(rr, rrcopy, sizeof(*rr));
1151 rrcopy->raidPtr = (void *) raidPtrs[unit];
1152
1153 LOCK_RECON_Q_MUTEX();
1154 rrcopy->next = recon_queue;
1155 recon_queue = rrcopy;
1156 wakeup(&recon_queue);
1157 UNLOCK_RECON_Q_MUTEX();
1158
1159 return (0);
1160
1161 /* invoke a copyback operation after recon on whatever disk
1162 * needs it, if any */
1163 case RAIDFRAME_COPYBACK:
1164
1165 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1166 /* This makes no sense on a RAID 0!! */
1167 return(EINVAL);
1168 }
1169
1170 /* borrow the current thread to get this done */
1171 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1172 s = splbio();
1173 rf_CopybackReconstructedData(raidPtrs[unit]);
1174 splx(s);
1175 return (0);
1176
1177 /* return the percentage completion of reconstruction */
1178 case RAIDFRAME_CHECKRECON:
1179 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1180 /* This makes no sense on a RAID 0 */
1181 return(EINVAL);
1182 }
1183
1184 row = *(int *) data;
1185 if (row < 0 || row >= raidPtrs[unit]->numRow)
1186 return (EINVAL);
1187 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1188 *(int *) data = 100;
1189 else
1190 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1191 return (0);
1192
1193 /* the sparetable daemon calls this to wait for the kernel to
1194 * need a spare table. this ioctl does not return until a
1195 * spare table is needed. XXX -- calling mpsleep here in the
1196 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1197 * -- I should either compute the spare table in the kernel,
1198 * or have a different -- XXX XXX -- interface (a different
1199 * character device) for delivering the table -- XXX */
1200 #if 0
1201 case RAIDFRAME_SPARET_WAIT:
1202 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1203 while (!rf_sparet_wait_queue)
1204 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1205 waitreq = rf_sparet_wait_queue;
1206 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1207 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1208
1209 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1210
1211 RF_Free(waitreq, sizeof(*waitreq));
1212 return (0);
1213
1214
1215 /* wakes up a process waiting on SPARET_WAIT and puts an error
1216 * code in it that will cause the dameon to exit */
1217 case RAIDFRAME_ABORT_SPARET_WAIT:
1218 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1219 waitreq->fcol = -1;
1220 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1221 waitreq->next = rf_sparet_wait_queue;
1222 rf_sparet_wait_queue = waitreq;
1223 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1224 wakeup(&rf_sparet_wait_queue);
1225 return (0);
1226
1227 /* used by the spare table daemon to deliver a spare table
1228 * into the kernel */
1229 case RAIDFRAME_SEND_SPARET:
1230
1231 /* install the spare table */
1232 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1233
1234 /* respond to the requestor. the return status of the spare
1235 * table installation is passed in the "fcol" field */
1236 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1237 waitreq->fcol = retcode;
1238 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1239 waitreq->next = rf_sparet_resp_queue;
1240 rf_sparet_resp_queue = waitreq;
1241 wakeup(&rf_sparet_resp_queue);
1242 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1243
1244 return (retcode);
1245 #endif
1246
1247
1248 #endif /* RAIDFRAME_RECON > 0 */
1249
1250 default:
1251 break; /* fall through to the os-specific code below */
1252
1253 }
1254
1255 if (!raidPtrs[unit]->valid)
1256 return (EINVAL);
1257
1258 /*
1259 * Add support for "regular" device ioctls here.
1260 */
1261
1262 switch (cmd) {
1263 case DIOCGDINFO:
1264 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1265 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1266 break;
1267
1268 case DIOCGPART:
1269 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1270 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1271 ((struct partinfo *) data)->part =
1272 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1273 break;
1274
1275 case DIOCWDINFO:
1276 db1_printf(("DIOCWDINFO\n"));
1277 case DIOCSDINFO:
1278 db1_printf(("DIOCSDINFO\n"));
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 rs->sc_flags |= RAIDF_LABELLING;
1283
1284 error = setdisklabel(rs->sc_dkdev.dk_label,
1285 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1286 if (error == 0) {
1287 if (cmd == DIOCWDINFO)
1288 error = writedisklabel(RAIDLABELDEV(dev),
1289 raidstrategy, rs->sc_dkdev.dk_label,
1290 rs->sc_dkdev.dk_cpulabel);
1291 }
1292 rs->sc_flags &= ~RAIDF_LABELLING;
1293
1294 raidunlock(rs);
1295
1296 if (error)
1297 return (error);
1298 break;
1299
1300 case DIOCWLABEL:
1301 db1_printf(("DIOCWLABEL\n"));
1302 if (*(int *) data != 0)
1303 rs->sc_flags |= RAIDF_WLABEL;
1304 else
1305 rs->sc_flags &= ~RAIDF_WLABEL;
1306 break;
1307
1308 case DIOCGDEFLABEL:
1309 db1_printf(("DIOCGDEFLABEL\n"));
1310 raidgetdefaultlabel(raidPtrs[unit], rs,
1311 (struct disklabel *) data);
1312 break;
1313
1314 default:
1315 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1316 }
1317 return (retcode);
1318
1319 }
1320
1321
1322 /* raidinit -- complete the rest of the initialization for the
1323 RAIDframe device. */
1324
1325
1326 static int
1327 raidinit(dev, raidPtr, unit)
1328 dev_t dev;
1329 RF_Raid_t *raidPtr;
1330 int unit;
1331 {
1332 int retcode;
1333 /* int ix; */
1334 /* struct raidbuf *raidbp; */
1335 struct raid_softc *rs;
1336
1337 retcode = 0;
1338
1339 rs = &raid_softc[unit];
1340 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1341 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1342
1343
1344 /* XXX should check return code first... */
1345 rs->sc_flags |= RAIDF_INITED;
1346
1347 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1348
1349 rs->sc_dkdev.dk_name = rs->sc_xname;
1350
1351 /* disk_attach actually creates space for the CPU disklabel, among
1352 * other things, so it's critical to call this *BEFORE* we try putzing
1353 * with disklabels. */
1354
1355 disk_attach(&rs->sc_dkdev);
1356
1357 /* XXX There may be a weird interaction here between this, and
1358 * protectedSectors, as used in RAIDframe. */
1359
1360 rs->sc_size = raidPtr->totalSectors;
1361 rs->sc_dev = dev;
1362
1363 return (retcode);
1364 }
1365
1366 /*
1367 * This kernel thread never exits. It is created once, and persists
1368 * until the system reboots.
1369 */
1370
1371 void
1372 rf_ReconKernelThread()
1373 {
1374 struct rf_recon_req *req;
1375 int s;
1376
1377 /* XXX not sure what spl() level we should be at here... probably
1378 * splbio() */
1379 s = splbio();
1380
1381 while (1) {
1382 /* grab the next reconstruction request from the queue */
1383 LOCK_RECON_Q_MUTEX();
1384 while (!recon_queue) {
1385 UNLOCK_RECON_Q_MUTEX();
1386 tsleep(&recon_queue, PRIBIO,
1387 "raidframe recon", 0);
1388 LOCK_RECON_Q_MUTEX();
1389 }
1390 req = recon_queue;
1391 recon_queue = recon_queue->next;
1392 UNLOCK_RECON_Q_MUTEX();
1393
1394 /*
1395 * If flags specifies that we should start recon, this call
1396 * will not return until reconstruction completes, fails,
1397 * or is aborted.
1398 */
1399 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1400 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1401
1402 RF_Free(req, sizeof(*req));
1403 }
1404 }
1405 /* wake up the daemon & tell it to get us a spare table
1406 * XXX
1407 * the entries in the queues should be tagged with the raidPtr
1408 * so that in the extremely rare case that two recons happen at once,
1409 * we know for which device were requesting a spare table
1410 * XXX
1411 */
1412 int
1413 rf_GetSpareTableFromDaemon(req)
1414 RF_SparetWait_t *req;
1415 {
1416 int retcode;
1417
1418 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1419 req->next = rf_sparet_wait_queue;
1420 rf_sparet_wait_queue = req;
1421 wakeup(&rf_sparet_wait_queue);
1422
1423 /* mpsleep unlocks the mutex */
1424 while (!rf_sparet_resp_queue) {
1425 tsleep(&rf_sparet_resp_queue, PRIBIO,
1426 "raidframe getsparetable", 0);
1427 #if 0
1428 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1429 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1430 MS_LOCK_SIMPLE);
1431 #endif
1432 }
1433 req = rf_sparet_resp_queue;
1434 rf_sparet_resp_queue = req->next;
1435 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1436
1437 retcode = req->fcol;
1438 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1439 * alloc'd */
1440 return (retcode);
1441 }
1442 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1443 * bp & passes it down.
1444 * any calls originating in the kernel must use non-blocking I/O
1445 * do some extra sanity checking to return "appropriate" error values for
1446 * certain conditions (to make some standard utilities work)
1447 *
1448 * Formerly known as: rf_DoAccessKernel
1449 */
1450 void
1451 raidstart(raidPtr)
1452 RF_Raid_t *raidPtr;
1453 {
1454 RF_SectorCount_t num_blocks, pb, sum;
1455 RF_RaidAddr_t raid_addr;
1456 int retcode;
1457 struct partition *pp;
1458 daddr_t blocknum;
1459 int unit;
1460 struct raid_softc *rs;
1461 int do_async;
1462 struct buf *bp;
1463 struct buf *dp;
1464
1465 unit = raidPtr->raidid;
1466 rs = &raid_softc[unit];
1467
1468 /* Check to see if we're at the limit... */
1469 RF_LOCK_MUTEX(raidPtr->mutex);
1470 while (raidPtr->openings > 0) {
1471 RF_UNLOCK_MUTEX(raidPtr->mutex);
1472
1473 /* get the next item, if any, from the queue */
1474 dp = &rs->buf_queue;
1475 bp = dp->b_actf;
1476 if (bp == NULL) {
1477 /* nothing more to do */
1478 return;
1479 }
1480
1481 /* update structures */
1482 dp = bp->b_actf;
1483 if (dp != NULL) {
1484 dp->b_actb = bp->b_actb;
1485 } else {
1486 rs->buf_queue.b_actb = bp->b_actb;
1487 }
1488 *bp->b_actb = dp;
1489
1490 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1491 * partition.. Need to make it absolute to the underlying
1492 * device.. */
1493
1494 blocknum = bp->b_blkno;
1495 if (DISKPART(bp->b_dev) != RAW_PART) {
1496 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1497 blocknum += pp->p_offset;
1498 }
1499
1500 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1501 (int) blocknum));
1502
1503 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1504 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1505
1506 /* *THIS* is where we adjust what block we're going to...
1507 * but DO NOT TOUCH bp->b_blkno!!! */
1508 raid_addr = blocknum;
1509
1510 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1511 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1512 sum = raid_addr + num_blocks + pb;
1513 if (1 || rf_debugKernelAccess) {
1514 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1515 (int) raid_addr, (int) sum, (int) num_blocks,
1516 (int) pb, (int) bp->b_resid));
1517 }
1518 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1519 || (sum < num_blocks) || (sum < pb)) {
1520 bp->b_error = ENOSPC;
1521 bp->b_flags |= B_ERROR;
1522 bp->b_resid = bp->b_bcount;
1523 biodone(bp);
1524 RF_LOCK_MUTEX(raidPtr->mutex);
1525 continue;
1526 }
1527 /*
1528 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1529 */
1530
1531 if (bp->b_bcount & raidPtr->sectorMask) {
1532 bp->b_error = EINVAL;
1533 bp->b_flags |= B_ERROR;
1534 bp->b_resid = bp->b_bcount;
1535 biodone(bp);
1536 RF_LOCK_MUTEX(raidPtr->mutex);
1537 continue;
1538
1539 }
1540 db1_printf(("Calling DoAccess..\n"));
1541
1542
1543 RF_LOCK_MUTEX(raidPtr->mutex);
1544 raidPtr->openings--;
1545 RF_UNLOCK_MUTEX(raidPtr->mutex);
1546
1547 /*
1548 * Everything is async.
1549 */
1550 do_async = 1;
1551
1552 /* don't ever condition on bp->b_flags & B_WRITE.
1553 * always condition on B_READ instead */
1554
1555 /* XXX we're still at splbio() here... do we *really*
1556 need to be? */
1557
1558 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1559 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1560 do_async, raid_addr, num_blocks,
1561 bp->b_un.b_addr, bp, NULL, NULL,
1562 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1563
1564
1565 RF_LOCK_MUTEX(raidPtr->mutex);
1566 }
1567 RF_UNLOCK_MUTEX(raidPtr->mutex);
1568 }
1569
1570
1571
1572
1573 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1574
1575 int
1576 rf_DispatchKernelIO(queue, req)
1577 RF_DiskQueue_t *queue;
1578 RF_DiskQueueData_t *req;
1579 {
1580 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1581 struct buf *bp;
1582 struct raidbuf *raidbp = NULL;
1583 struct raid_softc *rs;
1584 int unit;
1585
1586 /* XXX along with the vnode, we also need the softc associated with
1587 * this device.. */
1588
1589 req->queue = queue;
1590
1591 unit = queue->raidPtr->raidid;
1592
1593 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1594
1595 if (unit >= numraid) {
1596 printf("Invalid unit number: %d %d\n", unit, numraid);
1597 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1598 }
1599 rs = &raid_softc[unit];
1600
1601 /* XXX is this the right place? */
1602 disk_busy(&rs->sc_dkdev);
1603
1604 bp = req->bp;
1605 #if 1
1606 /* XXX when there is a physical disk failure, someone is passing us a
1607 * buffer that contains old stuff!! Attempt to deal with this problem
1608 * without taking a performance hit... (not sure where the real bug
1609 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1610
1611 if (bp->b_flags & B_ERROR) {
1612 bp->b_flags &= ~B_ERROR;
1613 }
1614 if (bp->b_error != 0) {
1615 bp->b_error = 0;
1616 }
1617 #endif
1618 raidbp = RAIDGETBUF(rs);
1619
1620 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1621
1622 /*
1623 * context for raidiodone
1624 */
1625 raidbp->rf_obp = bp;
1626 raidbp->req = req;
1627
1628 switch (req->type) {
1629 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1630 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1631 * queue->row, queue->col); */
1632 /* XXX need to do something extra here.. */
1633 /* I'm leaving this in, as I've never actually seen it used,
1634 * and I'd like folks to report it... GO */
1635 printf(("WAKEUP CALLED\n"));
1636 queue->numOutstanding++;
1637
1638 /* XXX need to glue the original buffer into this?? */
1639
1640 KernelWakeupFunc(&raidbp->rf_buf);
1641 break;
1642
1643 case RF_IO_TYPE_READ:
1644 case RF_IO_TYPE_WRITE:
1645
1646 if (req->tracerec) {
1647 RF_ETIMER_START(req->tracerec->timer);
1648 }
1649 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1650 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1651 req->sectorOffset, req->numSector,
1652 req->buf, KernelWakeupFunc, (void *) req,
1653 queue->raidPtr->logBytesPerSector, req->b_proc);
1654
1655 if (rf_debugKernelAccess) {
1656 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1657 (long) bp->b_blkno));
1658 }
1659 queue->numOutstanding++;
1660 queue->last_deq_sector = req->sectorOffset;
1661 /* acc wouldn't have been let in if there were any pending
1662 * reqs at any other priority */
1663 queue->curPriority = req->priority;
1664 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1665 * req->type, queue->row, queue->col); */
1666
1667 db1_printf(("Going for %c to unit %d row %d col %d\n",
1668 req->type, unit, queue->row, queue->col));
1669 db1_printf(("sector %d count %d (%d bytes) %d\n",
1670 (int) req->sectorOffset, (int) req->numSector,
1671 (int) (req->numSector <<
1672 queue->raidPtr->logBytesPerSector),
1673 (int) queue->raidPtr->logBytesPerSector));
1674 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1675 raidbp->rf_buf.b_vp->v_numoutput++;
1676 }
1677 VOP_STRATEGY(&raidbp->rf_buf);
1678
1679 break;
1680
1681 default:
1682 panic("bad req->type in rf_DispatchKernelIO");
1683 }
1684 db1_printf(("Exiting from DispatchKernelIO\n"));
1685 return (0);
1686 }
1687 /* this is the callback function associated with a I/O invoked from
1688 kernel code.
1689 */
1690 static void
1691 KernelWakeupFunc(vbp)
1692 struct buf *vbp;
1693 {
1694 RF_DiskQueueData_t *req = NULL;
1695 RF_DiskQueue_t *queue;
1696 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1697 struct buf *bp;
1698 struct raid_softc *rs;
1699 int unit;
1700 register int s;
1701
1702 s = splbio(); /* XXX */
1703 db1_printf(("recovering the request queue:\n"));
1704 req = raidbp->req;
1705
1706 bp = raidbp->rf_obp;
1707 #if 0
1708 db1_printf(("bp=0x%x\n", bp));
1709 #endif
1710
1711 queue = (RF_DiskQueue_t *) req->queue;
1712
1713 if (raidbp->rf_buf.b_flags & B_ERROR) {
1714 #if 0
1715 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1716 #endif
1717 bp->b_flags |= B_ERROR;
1718 bp->b_error = raidbp->rf_buf.b_error ?
1719 raidbp->rf_buf.b_error : EIO;
1720 }
1721 #if 0
1722 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1723 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1724 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1725 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1726 #endif
1727
1728 /* XXX methinks this could be wrong... */
1729 #if 1
1730 bp->b_resid = raidbp->rf_buf.b_resid;
1731 #endif
1732
1733 if (req->tracerec) {
1734 RF_ETIMER_STOP(req->tracerec->timer);
1735 RF_ETIMER_EVAL(req->tracerec->timer);
1736 RF_LOCK_MUTEX(rf_tracing_mutex);
1737 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1738 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1739 req->tracerec->num_phys_ios++;
1740 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1741 }
1742 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1743
1744 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1745
1746
1747 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1748 * ballistic, and mark the component as hosed... */
1749 #if 1
1750 if (bp->b_flags & B_ERROR) {
1751 /* Mark the disk as dead */
1752 /* but only mark it once... */
1753 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1754 rf_ds_optimal) {
1755 printf("raid%d: IO Error. Marking %s as failed.\n",
1756 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1757 queue->raidPtr->Disks[queue->row][queue->col].status =
1758 rf_ds_failed;
1759 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1760 queue->raidPtr->numFailures++;
1761 /* XXX here we should bump the version number for each component, and write that data out */
1762 } else { /* Disk is already dead... */
1763 /* printf("Disk already marked as dead!\n"); */
1764 }
1765
1766 }
1767 #endif
1768
1769 rs = &raid_softc[unit];
1770 RAIDPUTBUF(rs, raidbp);
1771
1772
1773 if (bp->b_resid == 0) {
1774 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1775 unit, bp->b_resid, bp->b_bcount));
1776 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1777 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1778 } else {
1779 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1780 }
1781
1782 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1783 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1784 /* printf("Exiting KernelWakeupFunc\n"); */
1785
1786 splx(s); /* XXX */
1787 }
1788
1789
1790
1791 /*
1792 * initialize a buf structure for doing an I/O in the kernel.
1793 */
1794 static void
1795 InitBP(
1796 struct buf * bp,
1797 struct vnode * b_vp,
1798 unsigned rw_flag,
1799 dev_t dev,
1800 RF_SectorNum_t startSect,
1801 RF_SectorCount_t numSect,
1802 caddr_t buf,
1803 void (*cbFunc) (struct buf *),
1804 void *cbArg,
1805 int logBytesPerSector,
1806 struct proc * b_proc)
1807 {
1808 /* bp->b_flags = B_PHYS | rw_flag; */
1809 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1810 bp->b_bcount = numSect << logBytesPerSector;
1811 bp->b_bufsize = bp->b_bcount;
1812 bp->b_error = 0;
1813 bp->b_dev = dev;
1814 db1_printf(("bp->b_dev is %d\n", dev));
1815 bp->b_un.b_addr = buf;
1816 #if 0
1817 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1818 #endif
1819
1820 bp->b_blkno = startSect;
1821 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1822 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1823 if (bp->b_bcount == 0) {
1824 panic("bp->b_bcount is zero in InitBP!!\n");
1825 }
1826 bp->b_proc = b_proc;
1827 bp->b_iodone = cbFunc;
1828 bp->b_vp = b_vp;
1829
1830 }
1831 /* Extras... */
1832
1833 unsigned int
1834 rpcc()
1835 {
1836 /* XXX no clue what this is supposed to do.. my guess is that it's
1837 * supposed to read the CPU cycle counter... */
1838 /* db1_printf("this is supposed to do something useful too!??\n"); */
1839 return (0);
1840 }
1841 #if 0
1842 int
1843 rf_GetSpareTableFromDaemon(req)
1844 RF_SparetWait_t *req;
1845 {
1846 int retcode = 1;
1847 printf("This is supposed to do something useful!!\n"); /* XXX */
1848
1849 return (retcode);
1850
1851 }
1852 #endif
1853
1854 static void
1855 raidgetdefaultlabel(raidPtr, rs, lp)
1856 RF_Raid_t *raidPtr;
1857 struct raid_softc *rs;
1858 struct disklabel *lp;
1859 {
1860 db1_printf(("Building a default label...\n"));
1861 bzero(lp, sizeof(*lp));
1862
1863 /* fabricate a label... */
1864 lp->d_secperunit = raidPtr->totalSectors;
1865 lp->d_secsize = raidPtr->bytesPerSector;
1866 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1867 lp->d_ntracks = 1;
1868 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1869 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1870
1871 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1872 lp->d_type = DTYPE_RAID;
1873 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1874 lp->d_rpm = 3600;
1875 lp->d_interleave = 1;
1876 lp->d_flags = 0;
1877
1878 lp->d_partitions[RAW_PART].p_offset = 0;
1879 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1880 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1881 lp->d_npartitions = RAW_PART + 1;
1882
1883 lp->d_magic = DISKMAGIC;
1884 lp->d_magic2 = DISKMAGIC;
1885 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1886
1887 }
1888 /*
1889 * Read the disklabel from the raid device. If one is not present, fake one
1890 * up.
1891 */
1892 static void
1893 raidgetdisklabel(dev)
1894 dev_t dev;
1895 {
1896 int unit = raidunit(dev);
1897 struct raid_softc *rs = &raid_softc[unit];
1898 char *errstring;
1899 struct disklabel *lp = rs->sc_dkdev.dk_label;
1900 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1901 RF_Raid_t *raidPtr;
1902
1903 db1_printf(("Getting the disklabel...\n"));
1904
1905 bzero(clp, sizeof(*clp));
1906
1907 raidPtr = raidPtrs[unit];
1908
1909 raidgetdefaultlabel(raidPtr, rs, lp);
1910
1911 /*
1912 * Call the generic disklabel extraction routine.
1913 */
1914 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1915 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1916 if (errstring)
1917 raidmakedisklabel(rs);
1918 else {
1919 int i;
1920 struct partition *pp;
1921
1922 /*
1923 * Sanity check whether the found disklabel is valid.
1924 *
1925 * This is necessary since total size of the raid device
1926 * may vary when an interleave is changed even though exactly
1927 * same componets are used, and old disklabel may used
1928 * if that is found.
1929 */
1930 if (lp->d_secperunit != rs->sc_size)
1931 printf("WARNING: %s: "
1932 "total sector size in disklabel (%d) != "
1933 "the size of raid (%ld)\n", rs->sc_xname,
1934 lp->d_secperunit, (long) rs->sc_size);
1935 for (i = 0; i < lp->d_npartitions; i++) {
1936 pp = &lp->d_partitions[i];
1937 if (pp->p_offset + pp->p_size > rs->sc_size)
1938 printf("WARNING: %s: end of partition `%c' "
1939 "exceeds the size of raid (%ld)\n",
1940 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1941 }
1942 }
1943
1944 }
1945 /*
1946 * Take care of things one might want to take care of in the event
1947 * that a disklabel isn't present.
1948 */
1949 static void
1950 raidmakedisklabel(rs)
1951 struct raid_softc *rs;
1952 {
1953 struct disklabel *lp = rs->sc_dkdev.dk_label;
1954 db1_printf(("Making a label..\n"));
1955
1956 /*
1957 * For historical reasons, if there's no disklabel present
1958 * the raw partition must be marked FS_BSDFFS.
1959 */
1960
1961 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1962
1963 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1964
1965 lp->d_checksum = dkcksum(lp);
1966 }
1967 /*
1968 * Lookup the provided name in the filesystem. If the file exists,
1969 * is a valid block device, and isn't being used by anyone else,
1970 * set *vpp to the file's vnode.
1971 * You'll find the original of this in ccd.c
1972 */
1973 int
1974 raidlookup(path, p, vpp)
1975 char *path;
1976 struct proc *p;
1977 struct vnode **vpp; /* result */
1978 {
1979 struct nameidata nd;
1980 struct vnode *vp;
1981 struct vattr va;
1982 int error;
1983
1984 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1985 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1986 #ifdef DEBUG
1987 printf("RAIDframe: vn_open returned %d\n", error);
1988 #endif
1989 return (error);
1990 }
1991 vp = nd.ni_vp;
1992 if (vp->v_usecount > 1) {
1993 VOP_UNLOCK(vp, 0);
1994 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1995 return (EBUSY);
1996 }
1997 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1998 VOP_UNLOCK(vp, 0);
1999 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2000 return (error);
2001 }
2002 /* XXX: eventually we should handle VREG, too. */
2003 if (va.va_type != VBLK) {
2004 VOP_UNLOCK(vp, 0);
2005 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2006 return (ENOTBLK);
2007 }
2008 VOP_UNLOCK(vp, 0);
2009 *vpp = vp;
2010 return (0);
2011 }
2012 /*
2013 * Wait interruptibly for an exclusive lock.
2014 *
2015 * XXX
2016 * Several drivers do this; it should be abstracted and made MP-safe.
2017 * (Hmm... where have we seen this warning before :-> GO )
2018 */
2019 static int
2020 raidlock(rs)
2021 struct raid_softc *rs;
2022 {
2023 int error;
2024
2025 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2026 rs->sc_flags |= RAIDF_WANTED;
2027 if ((error =
2028 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2029 return (error);
2030 }
2031 rs->sc_flags |= RAIDF_LOCKED;
2032 return (0);
2033 }
2034 /*
2035 * Unlock and wake up any waiters.
2036 */
2037 static void
2038 raidunlock(rs)
2039 struct raid_softc *rs;
2040 {
2041
2042 rs->sc_flags &= ~RAIDF_LOCKED;
2043 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2044 rs->sc_flags &= ~RAIDF_WANTED;
2045 wakeup(rs);
2046 }
2047 }
2048
2049
2050 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2051 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2052
2053 int
2054 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2055 {
2056 RF_ComponentLabel_t component_label;
2057 raidread_component_label(dev, b_vp, &component_label);
2058 component_label.mod_counter = mod_counter;
2059 component_label.clean = RF_RAID_CLEAN;
2060 raidwrite_component_label(dev, b_vp, &component_label);
2061 return(0);
2062 }
2063
2064
2065 int
2066 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2067 {
2068 RF_ComponentLabel_t component_label;
2069 raidread_component_label(dev, b_vp, &component_label);
2070 component_label.mod_counter = mod_counter;
2071 component_label.clean = RF_RAID_DIRTY;
2072 raidwrite_component_label(dev, b_vp, &component_label);
2073 return(0);
2074 }
2075
2076 /* ARGSUSED */
2077 int
2078 raidread_component_label(dev, b_vp, component_label)
2079 dev_t dev;
2080 struct vnode *b_vp;
2081 RF_ComponentLabel_t *component_label;
2082 {
2083 struct buf *bp;
2084 int error;
2085
2086 /* XXX should probably ensure that we don't try to do this if
2087 someone has changed rf_protected_sectors. */
2088
2089 /* get a block of the appropriate size... */
2090 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2091 bp->b_dev = dev;
2092
2093 /* get our ducks in a row for the read */
2094 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2095 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2096 bp->b_flags = B_BUSY | B_READ;
2097 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2098
2099 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2100
2101 error = biowait(bp);
2102
2103 if (!error) {
2104 memcpy(component_label, bp->b_un.b_addr,
2105 sizeof(RF_ComponentLabel_t));
2106 #if 0
2107 printf("raidread_component_label: got component label:\n");
2108 printf("Version: %d\n",component_label->version);
2109 printf("Serial Number: %d\n",component_label->serial_number);
2110 printf("Mod counter: %d\n",component_label->mod_counter);
2111 printf("Row: %d\n", component_label->row);
2112 printf("Column: %d\n", component_label->column);
2113 printf("Num Rows: %d\n", component_label->num_rows);
2114 printf("Num Columns: %d\n", component_label->num_columns);
2115 printf("Clean: %d\n", component_label->clean);
2116 printf("Status: %d\n", component_label->status);
2117 #endif
2118 } else {
2119 printf("Failed to read RAID component label!\n");
2120 }
2121
2122 bp->b_flags = B_INVAL | B_AGE;
2123 brelse(bp);
2124 return(error);
2125 }
2126 /* ARGSUSED */
2127 int
2128 raidwrite_component_label(dev, b_vp, component_label)
2129 dev_t dev;
2130 struct vnode *b_vp;
2131 RF_ComponentLabel_t *component_label;
2132 {
2133 struct buf *bp;
2134 int error;
2135
2136 /* get a block of the appropriate size... */
2137 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2138 bp->b_dev = dev;
2139
2140 /* get our ducks in a row for the write */
2141 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2142 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2143 bp->b_flags = B_BUSY | B_WRITE;
2144 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2145
2146 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2147
2148 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2149
2150 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2151 error = biowait(bp);
2152 bp->b_flags = B_INVAL | B_AGE;
2153 brelse(bp);
2154 if (error) {
2155 printf("Failed to write RAID component info!\n");
2156 }
2157
2158 return(error);
2159 }
2160
2161 void
2162 rf_markalldirty( raidPtr )
2163 RF_Raid_t *raidPtr;
2164 {
2165 RF_ComponentLabel_t c_label;
2166 int r,c;
2167
2168 raidPtr->mod_counter++;
2169 for (r = 0; r < raidPtr->numRow; r++) {
2170 for (c = 0; c < raidPtr->numCol; c++) {
2171 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2172 raidread_component_label(
2173 raidPtr->Disks[r][c].dev,
2174 raidPtr->raid_cinfo[r][c].ci_vp,
2175 &c_label);
2176 if (c_label.status == rf_ds_spared) {
2177 /* XXX do something special...
2178 but whatever you do, don't
2179 try to access it!! */
2180 } else {
2181 #if 0
2182 c_label.status =
2183 raidPtr->Disks[r][c].status;
2184 raidwrite_component_label(
2185 raidPtr->Disks[r][c].dev,
2186 raidPtr->raid_cinfo[r][c].ci_vp,
2187 &c_label);
2188 #endif
2189 raidmarkdirty(
2190 raidPtr->Disks[r][c].dev,
2191 raidPtr->raid_cinfo[r][c].ci_vp,
2192 raidPtr->mod_counter);
2193 }
2194 }
2195 }
2196 }
2197 /* printf("Component labels marked dirty.\n"); */
2198 #if 0
2199 for( c = 0; c < raidPtr->numSpare ; c++) {
2200 sparecol = raidPtr->numCol + c;
2201 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2202 /*
2203
2204 XXX this is where we get fancy and map this spare
2205 into it's correct spot in the array.
2206
2207 */
2208 /*
2209
2210 we claim this disk is "optimal" if it's
2211 rf_ds_used_spare, as that means it should be
2212 directly substitutable for the disk it replaced.
2213 We note that too...
2214
2215 */
2216
2217 for(i=0;i<raidPtr->numRow;i++) {
2218 for(j=0;j<raidPtr->numCol;j++) {
2219 if ((raidPtr->Disks[i][j].spareRow ==
2220 r) &&
2221 (raidPtr->Disks[i][j].spareCol ==
2222 sparecol)) {
2223 srow = r;
2224 scol = sparecol;
2225 break;
2226 }
2227 }
2228 }
2229
2230 raidread_component_label(
2231 raidPtr->Disks[r][sparecol].dev,
2232 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2233 &c_label);
2234 /* make sure status is noted */
2235 c_label.version = RF_COMPONENT_LABEL_VERSION;
2236 c_label.mod_counter = raidPtr->mod_counter;
2237 c_label.serial_number = raidPtr->serial_number;
2238 c_label.row = srow;
2239 c_label.column = scol;
2240 c_label.num_rows = raidPtr->numRow;
2241 c_label.num_columns = raidPtr->numCol;
2242 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2243 c_label.status = rf_ds_optimal;
2244 raidwrite_component_label(
2245 raidPtr->Disks[r][sparecol].dev,
2246 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2247 &c_label);
2248 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2249 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2250 }
2251 }
2252
2253 #endif
2254 }
2255
2256
2257 void
2258 rf_update_component_labels( raidPtr )
2259 RF_Raid_t *raidPtr;
2260 {
2261 RF_ComponentLabel_t c_label;
2262 int sparecol;
2263 int r,c;
2264 int i,j;
2265 int srow, scol;
2266
2267 srow = -1;
2268 scol = -1;
2269
2270 /* XXX should do extra checks to make sure things really are clean,
2271 rather than blindly setting the clean bit... */
2272
2273 raidPtr->mod_counter++;
2274
2275 for (r = 0; r < raidPtr->numRow; r++) {
2276 for (c = 0; c < raidPtr->numCol; c++) {
2277 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2278 raidread_component_label(
2279 raidPtr->Disks[r][c].dev,
2280 raidPtr->raid_cinfo[r][c].ci_vp,
2281 &c_label);
2282 /* make sure status is noted */
2283 c_label.status = rf_ds_optimal;
2284 raidwrite_component_label(
2285 raidPtr->Disks[r][c].dev,
2286 raidPtr->raid_cinfo[r][c].ci_vp,
2287 &c_label);
2288 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2289 raidmarkclean(
2290 raidPtr->Disks[r][c].dev,
2291 raidPtr->raid_cinfo[r][c].ci_vp,
2292 raidPtr->mod_counter);
2293 }
2294 }
2295 /* else we don't touch it.. */
2296 #if 0
2297 else if (raidPtr->Disks[r][c].status !=
2298 rf_ds_failed) {
2299 raidread_component_label(
2300 raidPtr->Disks[r][c].dev,
2301 raidPtr->raid_cinfo[r][c].ci_vp,
2302 &c_label);
2303 /* make sure status is noted */
2304 c_label.status =
2305 raidPtr->Disks[r][c].status;
2306 raidwrite_component_label(
2307 raidPtr->Disks[r][c].dev,
2308 raidPtr->raid_cinfo[r][c].ci_vp,
2309 &c_label);
2310 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2311 raidmarkclean(
2312 raidPtr->Disks[r][c].dev,
2313 raidPtr->raid_cinfo[r][c].ci_vp,
2314 raidPtr->mod_counter);
2315 }
2316 }
2317 #endif
2318 }
2319 }
2320
2321 for( c = 0; c < raidPtr->numSpare ; c++) {
2322 sparecol = raidPtr->numCol + c;
2323 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2324 /*
2325
2326 we claim this disk is "optimal" if it's
2327 rf_ds_used_spare, as that means it should be
2328 directly substitutable for the disk it replaced.
2329 We note that too...
2330
2331 */
2332
2333 for(i=0;i<raidPtr->numRow;i++) {
2334 for(j=0;j<raidPtr->numCol;j++) {
2335 if ((raidPtr->Disks[i][j].spareRow ==
2336 0) &&
2337 (raidPtr->Disks[i][j].spareCol ==
2338 sparecol)) {
2339 srow = i;
2340 scol = j;
2341 break;
2342 }
2343 }
2344 }
2345
2346 raidread_component_label(
2347 raidPtr->Disks[0][sparecol].dev,
2348 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2349 &c_label);
2350 /* make sure status is noted */
2351 c_label.version = RF_COMPONENT_LABEL_VERSION;
2352 c_label.mod_counter = raidPtr->mod_counter;
2353 c_label.serial_number = raidPtr->serial_number;
2354 c_label.row = srow;
2355 c_label.column = scol;
2356 c_label.num_rows = raidPtr->numRow;
2357 c_label.num_columns = raidPtr->numCol;
2358 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2359 c_label.status = rf_ds_optimal;
2360 raidwrite_component_label(
2361 raidPtr->Disks[0][sparecol].dev,
2362 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2363 &c_label);
2364 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2365 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2366 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2367 raidPtr->mod_counter);
2368 }
2369 }
2370 }
2371 /* printf("Component labels updated\n"); */
2372 }
2373