rf_netbsdkintf.c revision 1.20 1 /* $NetBSD: rf_netbsdkintf.c,v 1.20 1999/07/08 00:45:23 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 struct rf_recon_req *rrcopy, *rr;
687 RF_ComponentLabel_t *component_label;
688 RF_ComponentLabel_t ci_label;
689 RF_ComponentLabel_t **c_label_ptr;
690 RF_SingleComponent_t *sparePtr,*componentPtr;
691 RF_SingleComponent_t hot_spare;
692 RF_SingleComponent_t component;
693
694 if (unit >= numraid)
695 return (ENXIO);
696 rs = &raid_softc[unit];
697
698 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
699 (int) DISKPART(dev), (int) unit, (int) cmd));
700
701 /* Must be open for writes for these commands... */
702 switch (cmd) {
703 case DIOCSDINFO:
704 case DIOCWDINFO:
705 case DIOCWLABEL:
706 if ((flag & FWRITE) == 0)
707 return (EBADF);
708 }
709
710 /* Must be initialized for these... */
711 switch (cmd) {
712 case DIOCGDINFO:
713 case DIOCSDINFO:
714 case DIOCWDINFO:
715 case DIOCGPART:
716 case DIOCWLABEL:
717 case DIOCGDEFLABEL:
718 case RAIDFRAME_SHUTDOWN:
719 case RAIDFRAME_REWRITEPARITY:
720 case RAIDFRAME_GET_INFO:
721 case RAIDFRAME_RESET_ACCTOTALS:
722 case RAIDFRAME_GET_ACCTOTALS:
723 case RAIDFRAME_KEEP_ACCTOTALS:
724 case RAIDFRAME_GET_SIZE:
725 case RAIDFRAME_FAIL_DISK:
726 case RAIDFRAME_COPYBACK:
727 case RAIDFRAME_CHECKRECON:
728 case RAIDFRAME_GET_COMPONENT_LABEL:
729 case RAIDFRAME_SET_COMPONENT_LABEL:
730 case RAIDFRAME_ADD_HOT_SPARE:
731 case RAIDFRAME_REMOVE_HOT_SPARE:
732 case RAIDFRAME_INIT_LABELS:
733 case RAIDFRAME_REBUILD_IN_PLACE:
734 if ((rs->sc_flags & RAIDF_INITED) == 0)
735 return (ENXIO);
736 }
737
738 switch (cmd) {
739
740
741 /* configure the system */
742 case RAIDFRAME_CONFIGURE:
743
744 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
745 /* copy-in the configuration information */
746 /* data points to a pointer to the configuration structure */
747 u_cfg = *((RF_Config_t **) data);
748 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
749 if (k_cfg == NULL) {
750 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
751 return (ENOMEM);
752 }
753 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
754 sizeof(RF_Config_t));
755 if (retcode) {
756 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
757 retcode));
758 return (retcode);
759 }
760 /* allocate a buffer for the layout-specific data, and copy it
761 * in */
762 if (k_cfg->layoutSpecificSize) {
763 if (k_cfg->layoutSpecificSize > 10000) {
764 /* sanity check */
765 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
766 return (EINVAL);
767 }
768 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
769 (u_char *));
770 if (specific_buf == NULL) {
771 RF_Free(k_cfg, sizeof(RF_Config_t));
772 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
773 return (ENOMEM);
774 }
775 retcode = copyin(k_cfg->layoutSpecific,
776 (caddr_t) specific_buf,
777 k_cfg->layoutSpecificSize);
778 if (retcode) {
779 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
780 retcode));
781 return (retcode);
782 }
783 } else
784 specific_buf = NULL;
785 k_cfg->layoutSpecific = specific_buf;
786
787 /* should do some kind of sanity check on the configuration.
788 * Store the sum of all the bytes in the last byte? */
789
790 #if 0
791 db1_printf(("Considering configuring the system.:%d 0x%x\n",
792 unit, p));
793 #endif
794
795 /* We need the pointer to this a little deeper, so stash it
796 * here... */
797
798 raidPtrs[unit]->proc = p;
799
800 /* configure the system */
801
802 raidPtrs[unit]->raidid = unit;
803
804 retcode = rf_Configure(raidPtrs[unit], k_cfg);
805
806 /* allow this many simultaneous IO's to this RAID device */
807 raidPtrs[unit]->openings = RAIDOUTSTANDING;
808
809 if (retcode == 0) {
810 retcode = raidinit(dev, raidPtrs[unit], unit);
811 rf_markalldirty( raidPtrs[unit] );
812 }
813 /* free the buffers. No return code here. */
814 if (k_cfg->layoutSpecificSize) {
815 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
816 }
817 RF_Free(k_cfg, sizeof(RF_Config_t));
818
819 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
820 retcode));
821
822 return (retcode);
823
824 /* shutdown the system */
825 case RAIDFRAME_SHUTDOWN:
826
827 if ((error = raidlock(rs)) != 0)
828 return (error);
829
830 /*
831 * If somebody has a partition mounted, we shouldn't
832 * shutdown.
833 */
834
835 part = DISKPART(dev);
836 pmask = (1 << part);
837 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
838 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
839 (rs->sc_dkdev.dk_copenmask & pmask))) {
840 raidunlock(rs);
841 return (EBUSY);
842 }
843
844 if (rf_debugKernelAccess) {
845 printf("call shutdown\n");
846 }
847 raidPtrs[unit]->proc = p; /* XXX necessary evil */
848
849 retcode = rf_Shutdown(raidPtrs[unit]);
850
851 db1_printf(("Done main shutdown\n"));
852
853 pool_destroy(&rs->sc_cbufpool);
854 db1_printf(("Done freeing component buffer freelist\n"));
855
856 /* It's no longer initialized... */
857 rs->sc_flags &= ~RAIDF_INITED;
858
859 /* Detach the disk. */
860 disk_detach(&rs->sc_dkdev);
861
862 raidunlock(rs);
863
864 return (retcode);
865 case RAIDFRAME_GET_COMPONENT_LABEL:
866 c_label_ptr = (RF_ComponentLabel_t **) data;
867 /* need to read the component label for the disk indicated
868 by row,column in component_label
869 XXX need to sanity check these values!!!
870 */
871
872 /* For practice, let's get it directly fromdisk, rather
873 than from the in-core copy */
874 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
875 (RF_ComponentLabel_t *));
876 if (component_label == NULL)
877 return (ENOMEM);
878
879 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
880
881 retcode = copyin( *c_label_ptr, component_label,
882 sizeof(RF_ComponentLabel_t));
883
884 if (retcode) {
885 return(retcode);
886 }
887
888 row = component_label->row;
889 printf("Row: %d\n",row);
890 if (row > raidPtrs[unit]->numRow) {
891 row = 0; /* XXX */
892 }
893 column = component_label->column;
894 printf("Column: %d\n",column);
895 if (column > raidPtrs[unit]->numCol) {
896 column = 0; /* XXX */
897 }
898
899 raidread_component_label(
900 raidPtrs[unit]->Disks[row][column].dev,
901 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
902 component_label );
903
904 retcode = copyout((caddr_t) component_label,
905 (caddr_t) *c_label_ptr,
906 sizeof(RF_ComponentLabel_t));
907 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
908 return (retcode);
909
910 case RAIDFRAME_SET_COMPONENT_LABEL:
911 component_label = (RF_ComponentLabel_t *) data;
912
913 /* XXX check the label for valid stuff... */
914 /* Note that some things *should not* get modified --
915 the user should be re-initing the labels instead of
916 trying to patch things.
917 */
918
919 printf("Got component label:\n");
920 printf("Version: %d\n",component_label->version);
921 printf("Serial Number: %d\n",component_label->serial_number);
922 printf("Mod counter: %d\n",component_label->mod_counter);
923 printf("Row: %d\n", component_label->row);
924 printf("Column: %d\n", component_label->column);
925 printf("Num Rows: %d\n", component_label->num_rows);
926 printf("Num Columns: %d\n", component_label->num_columns);
927 printf("Clean: %d\n", component_label->clean);
928 printf("Status: %d\n", component_label->status);
929
930 row = component_label->row;
931 column = component_label->column;
932
933 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
934 (column < 0) || (column > raidPtrs[unit]->numCol)) {
935 return(EINVAL);
936 }
937
938 /* XXX this isn't allowed to do anything for now :-) */
939 #if 0
940 raidwrite_component_label(
941 raidPtrs[unit]->Disks[row][column].dev,
942 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
943 component_label );
944 #endif
945 return (0);
946
947 case RAIDFRAME_INIT_LABELS:
948 component_label = (RF_ComponentLabel_t *) data;
949 /*
950 we only want the serial number from
951 the above. We get all the rest of the information
952 from the config that was used to create this RAID
953 set.
954 */
955
956 raidPtrs[unit]->serial_number = component_label->serial_number;
957 /* current version number */
958 ci_label.version = RF_COMPONENT_LABEL_VERSION;
959 ci_label.serial_number = component_label->serial_number;
960 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
961 ci_label.num_rows = raidPtrs[unit]->numRow;
962 ci_label.num_columns = raidPtrs[unit]->numCol;
963 ci_label.clean = RF_RAID_DIRTY; /* not clean */
964 ci_label.status = rf_ds_optimal; /* "It's good!" */
965
966 for(row=0;row<raidPtrs[unit]->numRow;row++) {
967 ci_label.row = row;
968 for(column=0;column<raidPtrs[unit]->numCol;column++) {
969 ci_label.column = column;
970 raidwrite_component_label(
971 raidPtrs[unit]->Disks[row][column].dev,
972 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
973 &ci_label );
974 }
975 }
976
977 return (retcode);
978
979 /* initialize all parity */
980 case RAIDFRAME_REWRITEPARITY:
981
982 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
983 /* Parity for RAID 0 is trivially correct */
984 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
985 return(0);
986 }
987
988 /* borrow the thread of the requesting process */
989 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
990 retcode = rf_RewriteParity(raidPtrs[unit]);
991 /* return I/O Error if the parity rewrite fails */
992
993 if (retcode) {
994 retcode = EIO;
995 } else {
996 /* set the clean bit! If we shutdown correctly,
997 the clean bit on each component label will get
998 set */
999 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1000 }
1001 return (retcode);
1002
1003
1004 case RAIDFRAME_ADD_HOT_SPARE:
1005 sparePtr = (RF_SingleComponent_t *) data;
1006 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1007 printf("Adding spare\n");
1008 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1009 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1010 return(retcode);
1011
1012 case RAIDFRAME_REMOVE_HOT_SPARE:
1013 return(retcode);
1014
1015 case RAIDFRAME_REBUILD_IN_PLACE:
1016 componentPtr = (RF_SingleComponent_t *) data;
1017 memcpy( &component, componentPtr,
1018 sizeof(RF_SingleComponent_t));
1019 row = component.row;
1020 column = component.column;
1021 printf("Rebuild: %d %d\n",row, column);
1022 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1023 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1024 return(EINVAL);
1025 }
1026 printf("Attempting a rebuild in place\n");
1027 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1028 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1029 return(retcode);
1030
1031 /* issue a test-unit-ready through raidframe to the indicated
1032 * device */
1033 #if 0 /* XXX not supported yet (ever?) */
1034 case RAIDFRAME_TUR:
1035 /* debug only */
1036 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1037 return (retcode);
1038 #endif
1039 case RAIDFRAME_GET_INFO:
1040 {
1041 RF_Raid_t *raid = raidPtrs[unit];
1042 RF_DeviceConfig_t *cfg, **ucfgp;
1043 int i, j, d;
1044
1045 if (!raid->valid)
1046 return (ENODEV);
1047 ucfgp = (RF_DeviceConfig_t **) data;
1048 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1049 (RF_DeviceConfig_t *));
1050 if (cfg == NULL)
1051 return (ENOMEM);
1052 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1053 cfg->rows = raid->numRow;
1054 cfg->cols = raid->numCol;
1055 cfg->ndevs = raid->numRow * raid->numCol;
1056 if (cfg->ndevs >= RF_MAX_DISKS) {
1057 cfg->ndevs = 0;
1058 return (ENOMEM);
1059 }
1060 cfg->nspares = raid->numSpare;
1061 if (cfg->nspares >= RF_MAX_DISKS) {
1062 cfg->nspares = 0;
1063 return (ENOMEM);
1064 }
1065 cfg->maxqdepth = raid->maxQueueDepth;
1066 d = 0;
1067 for (i = 0; i < cfg->rows; i++) {
1068 for (j = 0; j < cfg->cols; j++) {
1069 cfg->devs[d] = raid->Disks[i][j];
1070 d++;
1071 }
1072 }
1073 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1074 cfg->spares[i] = raid->Disks[0][j];
1075 }
1076 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1077 sizeof(RF_DeviceConfig_t));
1078 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1079
1080 return (retcode);
1081 }
1082 break;
1083
1084 case RAIDFRAME_RESET_ACCTOTALS:
1085 {
1086 RF_Raid_t *raid = raidPtrs[unit];
1087
1088 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1089 return (0);
1090 }
1091 break;
1092
1093 case RAIDFRAME_GET_ACCTOTALS:
1094 {
1095 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1096 RF_Raid_t *raid = raidPtrs[unit];
1097
1098 *totals = raid->acc_totals;
1099 return (0);
1100 }
1101 break;
1102
1103 case RAIDFRAME_KEEP_ACCTOTALS:
1104 {
1105 RF_Raid_t *raid = raidPtrs[unit];
1106 int *keep = (int *) data;
1107
1108 raid->keep_acc_totals = *keep;
1109 return (0);
1110 }
1111 break;
1112
1113 case RAIDFRAME_GET_SIZE:
1114 *(int *) data = raidPtrs[unit]->totalSectors;
1115 return (0);
1116
1117 #define RAIDFRAME_RECON 1
1118 /* XXX The above should probably be set somewhere else!! GO */
1119 #if RAIDFRAME_RECON > 0
1120
1121 /* fail a disk & optionally start reconstruction */
1122 case RAIDFRAME_FAIL_DISK:
1123 rr = (struct rf_recon_req *) data;
1124
1125 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1126 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1127 return (EINVAL);
1128
1129 printf("raid%d: Failing the disk: row: %d col: %d\n",
1130 unit, rr->row, rr->col);
1131
1132 /* make a copy of the recon request so that we don't rely on
1133 * the user's buffer */
1134 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1135 bcopy(rr, rrcopy, sizeof(*rr));
1136 rrcopy->raidPtr = (void *) raidPtrs[unit];
1137
1138 LOCK_RECON_Q_MUTEX();
1139 rrcopy->next = recon_queue;
1140 recon_queue = rrcopy;
1141 wakeup(&recon_queue);
1142 UNLOCK_RECON_Q_MUTEX();
1143
1144 return (0);
1145
1146 /* invoke a copyback operation after recon on whatever disk
1147 * needs it, if any */
1148 case RAIDFRAME_COPYBACK:
1149 /* borrow the current thread to get this done */
1150 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1151 rf_CopybackReconstructedData(raidPtrs[unit]);
1152 return (0);
1153
1154 /* return the percentage completion of reconstruction */
1155 case RAIDFRAME_CHECKRECON:
1156 row = *(int *) data;
1157 if (row < 0 || row >= raidPtrs[unit]->numRow)
1158 return (EINVAL);
1159 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1160 *(int *) data = 100;
1161 else
1162 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1163 return (0);
1164
1165 /* the sparetable daemon calls this to wait for the kernel to
1166 * need a spare table. this ioctl does not return until a
1167 * spare table is needed. XXX -- calling mpsleep here in the
1168 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1169 * -- I should either compute the spare table in the kernel,
1170 * or have a different -- XXX XXX -- interface (a different
1171 * character device) for delivering the table -- XXX */
1172 #if 0
1173 case RAIDFRAME_SPARET_WAIT:
1174 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1175 while (!rf_sparet_wait_queue)
1176 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1177 waitreq = rf_sparet_wait_queue;
1178 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1179 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1180
1181 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1182
1183 RF_Free(waitreq, sizeof(*waitreq));
1184 return (0);
1185
1186
1187 /* wakes up a process waiting on SPARET_WAIT and puts an error
1188 * code in it that will cause the dameon to exit */
1189 case RAIDFRAME_ABORT_SPARET_WAIT:
1190 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1191 waitreq->fcol = -1;
1192 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1193 waitreq->next = rf_sparet_wait_queue;
1194 rf_sparet_wait_queue = waitreq;
1195 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1196 wakeup(&rf_sparet_wait_queue);
1197 return (0);
1198
1199 /* used by the spare table daemon to deliver a spare table
1200 * into the kernel */
1201 case RAIDFRAME_SEND_SPARET:
1202
1203 /* install the spare table */
1204 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1205
1206 /* respond to the requestor. the return status of the spare
1207 * table installation is passed in the "fcol" field */
1208 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1209 waitreq->fcol = retcode;
1210 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1211 waitreq->next = rf_sparet_resp_queue;
1212 rf_sparet_resp_queue = waitreq;
1213 wakeup(&rf_sparet_resp_queue);
1214 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1215
1216 return (retcode);
1217 #endif
1218
1219
1220 #endif /* RAIDFRAME_RECON > 0 */
1221
1222 default:
1223 break; /* fall through to the os-specific code below */
1224
1225 }
1226
1227 if (!raidPtrs[unit]->valid)
1228 return (EINVAL);
1229
1230 /*
1231 * Add support for "regular" device ioctls here.
1232 */
1233
1234 switch (cmd) {
1235 case DIOCGDINFO:
1236 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1237 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1238 break;
1239
1240 case DIOCGPART:
1241 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1242 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1243 ((struct partinfo *) data)->part =
1244 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1245 break;
1246
1247 case DIOCWDINFO:
1248 db1_printf(("DIOCWDINFO\n"));
1249 case DIOCSDINFO:
1250 db1_printf(("DIOCSDINFO\n"));
1251 if ((error = raidlock(rs)) != 0)
1252 return (error);
1253
1254 rs->sc_flags |= RAIDF_LABELLING;
1255
1256 error = setdisklabel(rs->sc_dkdev.dk_label,
1257 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1258 if (error == 0) {
1259 if (cmd == DIOCWDINFO)
1260 error = writedisklabel(RAIDLABELDEV(dev),
1261 raidstrategy, rs->sc_dkdev.dk_label,
1262 rs->sc_dkdev.dk_cpulabel);
1263 }
1264 rs->sc_flags &= ~RAIDF_LABELLING;
1265
1266 raidunlock(rs);
1267
1268 if (error)
1269 return (error);
1270 break;
1271
1272 case DIOCWLABEL:
1273 db1_printf(("DIOCWLABEL\n"));
1274 if (*(int *) data != 0)
1275 rs->sc_flags |= RAIDF_WLABEL;
1276 else
1277 rs->sc_flags &= ~RAIDF_WLABEL;
1278 break;
1279
1280 case DIOCGDEFLABEL:
1281 db1_printf(("DIOCGDEFLABEL\n"));
1282 raidgetdefaultlabel(raidPtrs[unit], rs,
1283 (struct disklabel *) data);
1284 break;
1285
1286 default:
1287 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1288 }
1289 return (retcode);
1290
1291 }
1292
1293
1294 /* raidinit -- complete the rest of the initialization for the
1295 RAIDframe device. */
1296
1297
1298 static int
1299 raidinit(dev, raidPtr, unit)
1300 dev_t dev;
1301 RF_Raid_t *raidPtr;
1302 int unit;
1303 {
1304 int retcode;
1305 /* int ix; */
1306 /* struct raidbuf *raidbp; */
1307 struct raid_softc *rs;
1308
1309 retcode = 0;
1310
1311 rs = &raid_softc[unit];
1312 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1313 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1314
1315
1316 /* XXX should check return code first... */
1317 rs->sc_flags |= RAIDF_INITED;
1318
1319 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1320
1321 rs->sc_dkdev.dk_name = rs->sc_xname;
1322
1323 /* disk_attach actually creates space for the CPU disklabel, among
1324 * other things, so it's critical to call this *BEFORE* we try putzing
1325 * with disklabels. */
1326
1327 disk_attach(&rs->sc_dkdev);
1328
1329 /* XXX There may be a weird interaction here between this, and
1330 * protectedSectors, as used in RAIDframe. */
1331
1332 rs->sc_size = raidPtr->totalSectors;
1333 rs->sc_dev = dev;
1334
1335 return (retcode);
1336 }
1337
1338 /*
1339 * This kernel thread never exits. It is created once, and persists
1340 * until the system reboots.
1341 */
1342
1343 void
1344 rf_ReconKernelThread()
1345 {
1346 struct rf_recon_req *req;
1347 int s;
1348
1349 /* XXX not sure what spl() level we should be at here... probably
1350 * splbio() */
1351 s = splbio();
1352
1353 while (1) {
1354 /* grab the next reconstruction request from the queue */
1355 LOCK_RECON_Q_MUTEX();
1356 while (!recon_queue) {
1357 UNLOCK_RECON_Q_MUTEX();
1358 tsleep(&recon_queue, PRIBIO,
1359 "raidframe recon", 0);
1360 LOCK_RECON_Q_MUTEX();
1361 }
1362 req = recon_queue;
1363 recon_queue = recon_queue->next;
1364 UNLOCK_RECON_Q_MUTEX();
1365
1366 /*
1367 * If flags specifies that we should start recon, this call
1368 * will not return until reconstruction completes, fails,
1369 * or is aborted.
1370 */
1371 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1372 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1373
1374 RF_Free(req, sizeof(*req));
1375 }
1376 }
1377 /* wake up the daemon & tell it to get us a spare table
1378 * XXX
1379 * the entries in the queues should be tagged with the raidPtr
1380 * so that in the extremely rare case that two recons happen at once,
1381 * we know for which device were requesting a spare table
1382 * XXX
1383 */
1384 int
1385 rf_GetSpareTableFromDaemon(req)
1386 RF_SparetWait_t *req;
1387 {
1388 int retcode;
1389
1390 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1391 req->next = rf_sparet_wait_queue;
1392 rf_sparet_wait_queue = req;
1393 wakeup(&rf_sparet_wait_queue);
1394
1395 /* mpsleep unlocks the mutex */
1396 while (!rf_sparet_resp_queue) {
1397 tsleep(&rf_sparet_resp_queue, PRIBIO,
1398 "raidframe getsparetable", 0);
1399 #if 0
1400 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1401 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1402 MS_LOCK_SIMPLE);
1403 #endif
1404 }
1405 req = rf_sparet_resp_queue;
1406 rf_sparet_resp_queue = req->next;
1407 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1408
1409 retcode = req->fcol;
1410 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1411 * alloc'd */
1412 return (retcode);
1413 }
1414 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1415 * bp & passes it down.
1416 * any calls originating in the kernel must use non-blocking I/O
1417 * do some extra sanity checking to return "appropriate" error values for
1418 * certain conditions (to make some standard utilities work)
1419 */
1420 int
1421 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1422 RF_Raid_t *raidPtr;
1423 struct buf *bp;
1424 RF_RaidAccessFlags_t flags;
1425 void (*cbFunc) (struct buf *);
1426 void *cbArg;
1427 {
1428 RF_SectorCount_t num_blocks, pb, sum;
1429 RF_RaidAddr_t raid_addr;
1430 int retcode;
1431 struct partition *pp;
1432 daddr_t blocknum;
1433 int unit;
1434 struct raid_softc *rs;
1435 int do_async;
1436
1437 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1438
1439 unit = raidPtr->raidid;
1440 rs = &raid_softc[unit];
1441
1442 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1443 * partition.. Need to make it absolute to the underlying device.. */
1444
1445 blocknum = bp->b_blkno;
1446 if (DISKPART(bp->b_dev) != RAW_PART) {
1447 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1448 blocknum += pp->p_offset;
1449 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1450 pp->p_offset));
1451 } else {
1452 db1_printf(("Is raw..\n"));
1453 }
1454 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1455
1456 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1457 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1458
1459 /* *THIS* is where we adjust what block we're going to... but DO NOT
1460 * TOUCH bp->b_blkno!!! */
1461 raid_addr = blocknum;
1462
1463 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1464 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1465 sum = raid_addr + num_blocks + pb;
1466 if (1 || rf_debugKernelAccess) {
1467 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1468 (int) raid_addr, (int) sum, (int) num_blocks,
1469 (int) pb, (int) bp->b_resid));
1470 }
1471 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1472 || (sum < num_blocks) || (sum < pb)) {
1473 bp->b_error = ENOSPC;
1474 bp->b_flags |= B_ERROR;
1475 bp->b_resid = bp->b_bcount;
1476 biodone(bp);
1477 return (bp->b_error);
1478 }
1479 /*
1480 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1481 */
1482
1483 if (bp->b_bcount & raidPtr->sectorMask) {
1484 bp->b_error = EINVAL;
1485 bp->b_flags |= B_ERROR;
1486 bp->b_resid = bp->b_bcount;
1487 biodone(bp);
1488 return (bp->b_error);
1489 }
1490 db1_printf(("Calling DoAccess..\n"));
1491
1492
1493 /* Put a throttle on the number of requests we handle simultanously */
1494
1495 RF_LOCK_MUTEX(raidPtr->mutex);
1496
1497 while(raidPtr->openings <= 0) {
1498 RF_UNLOCK_MUTEX(raidPtr->mutex);
1499 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1500 RF_LOCK_MUTEX(raidPtr->mutex);
1501 }
1502 raidPtr->openings--;
1503
1504 RF_UNLOCK_MUTEX(raidPtr->mutex);
1505
1506 /*
1507 * Everything is async.
1508 */
1509 do_async = 1;
1510
1511 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1512 * B_READ instead */
1513 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1514 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1515 do_async, raid_addr, num_blocks,
1516 bp->b_un.b_addr,
1517 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1518 NULL, cbFunc, cbArg);
1519 #if 0
1520 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1521 bp->b_data, (int) bp->b_resid));
1522 #endif
1523
1524 return (retcode);
1525 }
1526 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1527
1528 int
1529 rf_DispatchKernelIO(queue, req)
1530 RF_DiskQueue_t *queue;
1531 RF_DiskQueueData_t *req;
1532 {
1533 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1534 struct buf *bp;
1535 struct raidbuf *raidbp = NULL;
1536 struct raid_softc *rs;
1537 int unit;
1538
1539 /* XXX along with the vnode, we also need the softc associated with
1540 * this device.. */
1541
1542 req->queue = queue;
1543
1544 unit = queue->raidPtr->raidid;
1545
1546 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1547
1548 if (unit >= numraid) {
1549 printf("Invalid unit number: %d %d\n", unit, numraid);
1550 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1551 }
1552 rs = &raid_softc[unit];
1553
1554 /* XXX is this the right place? */
1555 disk_busy(&rs->sc_dkdev);
1556
1557 bp = req->bp;
1558 #if 1
1559 /* XXX when there is a physical disk failure, someone is passing us a
1560 * buffer that contains old stuff!! Attempt to deal with this problem
1561 * without taking a performance hit... (not sure where the real bug
1562 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1563
1564 if (bp->b_flags & B_ERROR) {
1565 bp->b_flags &= ~B_ERROR;
1566 }
1567 if (bp->b_error != 0) {
1568 bp->b_error = 0;
1569 }
1570 #endif
1571 raidbp = RAIDGETBUF(rs);
1572
1573 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1574
1575 /*
1576 * context for raidiodone
1577 */
1578 raidbp->rf_obp = bp;
1579 raidbp->req = req;
1580
1581 switch (req->type) {
1582 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1583 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1584 * queue->row, queue->col); */
1585 /* XXX need to do something extra here.. */
1586 /* I'm leaving this in, as I've never actually seen it used,
1587 * and I'd like folks to report it... GO */
1588 printf(("WAKEUP CALLED\n"));
1589 queue->numOutstanding++;
1590
1591 /* XXX need to glue the original buffer into this?? */
1592
1593 KernelWakeupFunc(&raidbp->rf_buf);
1594 break;
1595
1596 case RF_IO_TYPE_READ:
1597 case RF_IO_TYPE_WRITE:
1598
1599 if (req->tracerec) {
1600 RF_ETIMER_START(req->tracerec->timer);
1601 }
1602 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1603 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1604 req->sectorOffset, req->numSector,
1605 req->buf, KernelWakeupFunc, (void *) req,
1606 queue->raidPtr->logBytesPerSector, req->b_proc);
1607
1608 if (rf_debugKernelAccess) {
1609 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1610 (long) bp->b_blkno));
1611 }
1612 queue->numOutstanding++;
1613 queue->last_deq_sector = req->sectorOffset;
1614 /* acc wouldn't have been let in if there were any pending
1615 * reqs at any other priority */
1616 queue->curPriority = req->priority;
1617 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1618 * req->type, queue->row, queue->col); */
1619
1620 db1_printf(("Going for %c to unit %d row %d col %d\n",
1621 req->type, unit, queue->row, queue->col));
1622 db1_printf(("sector %d count %d (%d bytes) %d\n",
1623 (int) req->sectorOffset, (int) req->numSector,
1624 (int) (req->numSector <<
1625 queue->raidPtr->logBytesPerSector),
1626 (int) queue->raidPtr->logBytesPerSector));
1627 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1628 raidbp->rf_buf.b_vp->v_numoutput++;
1629 }
1630 VOP_STRATEGY(&raidbp->rf_buf);
1631
1632 break;
1633
1634 default:
1635 panic("bad req->type in rf_DispatchKernelIO");
1636 }
1637 db1_printf(("Exiting from DispatchKernelIO\n"));
1638 return (0);
1639 }
1640 /* this is the callback function associated with a I/O invoked from
1641 kernel code.
1642 */
1643 static void
1644 KernelWakeupFunc(vbp)
1645 struct buf *vbp;
1646 {
1647 RF_DiskQueueData_t *req = NULL;
1648 RF_DiskQueue_t *queue;
1649 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1650 struct buf *bp;
1651 struct raid_softc *rs;
1652 int unit;
1653 register int s;
1654
1655 s = splbio(); /* XXX */
1656 db1_printf(("recovering the request queue:\n"));
1657 req = raidbp->req;
1658
1659 bp = raidbp->rf_obp;
1660 #if 0
1661 db1_printf(("bp=0x%x\n", bp));
1662 #endif
1663
1664 queue = (RF_DiskQueue_t *) req->queue;
1665
1666 if (raidbp->rf_buf.b_flags & B_ERROR) {
1667 #if 0
1668 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1669 #endif
1670 bp->b_flags |= B_ERROR;
1671 bp->b_error = raidbp->rf_buf.b_error ?
1672 raidbp->rf_buf.b_error : EIO;
1673 }
1674 #if 0
1675 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1676 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1677 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1678 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1679 #endif
1680
1681 /* XXX methinks this could be wrong... */
1682 #if 1
1683 bp->b_resid = raidbp->rf_buf.b_resid;
1684 #endif
1685
1686 if (req->tracerec) {
1687 RF_ETIMER_STOP(req->tracerec->timer);
1688 RF_ETIMER_EVAL(req->tracerec->timer);
1689 RF_LOCK_MUTEX(rf_tracing_mutex);
1690 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1691 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1692 req->tracerec->num_phys_ios++;
1693 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1694 }
1695 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1696
1697 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1698
1699
1700 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1701 * ballistic, and mark the component as hosed... */
1702 #if 1
1703 if (bp->b_flags & B_ERROR) {
1704 /* Mark the disk as dead */
1705 /* but only mark it once... */
1706 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1707 rf_ds_optimal) {
1708 printf("raid%d: IO Error. Marking %s as failed.\n",
1709 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1710 queue->raidPtr->Disks[queue->row][queue->col].status =
1711 rf_ds_failed;
1712 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1713 queue->raidPtr->numFailures++;
1714 /* XXX here we should bump the version number for each component, and write that data out */
1715 } else { /* Disk is already dead... */
1716 /* printf("Disk already marked as dead!\n"); */
1717 }
1718
1719 }
1720 #endif
1721
1722 rs = &raid_softc[unit];
1723 RAIDPUTBUF(rs, raidbp);
1724
1725
1726 if (bp->b_resid == 0) {
1727 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1728 unit, bp->b_resid, bp->b_bcount));
1729 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1730 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1731 } else {
1732 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1733 }
1734
1735 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1736 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1737 /* printf("Exiting KernelWakeupFunc\n"); */
1738
1739 splx(s); /* XXX */
1740 }
1741
1742
1743
1744 /*
1745 * initialize a buf structure for doing an I/O in the kernel.
1746 */
1747 static void
1748 InitBP(
1749 struct buf * bp,
1750 struct vnode * b_vp,
1751 unsigned rw_flag,
1752 dev_t dev,
1753 RF_SectorNum_t startSect,
1754 RF_SectorCount_t numSect,
1755 caddr_t buf,
1756 void (*cbFunc) (struct buf *),
1757 void *cbArg,
1758 int logBytesPerSector,
1759 struct proc * b_proc)
1760 {
1761 /* bp->b_flags = B_PHYS | rw_flag; */
1762 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1763 bp->b_bcount = numSect << logBytesPerSector;
1764 bp->b_bufsize = bp->b_bcount;
1765 bp->b_error = 0;
1766 bp->b_dev = dev;
1767 db1_printf(("bp->b_dev is %d\n", dev));
1768 bp->b_un.b_addr = buf;
1769 #if 0
1770 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1771 #endif
1772
1773 bp->b_blkno = startSect;
1774 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1775 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1776 if (bp->b_bcount == 0) {
1777 panic("bp->b_bcount is zero in InitBP!!\n");
1778 }
1779 bp->b_proc = b_proc;
1780 bp->b_iodone = cbFunc;
1781 bp->b_vp = b_vp;
1782
1783 }
1784 /* Extras... */
1785
1786 unsigned int
1787 rpcc()
1788 {
1789 /* XXX no clue what this is supposed to do.. my guess is that it's
1790 * supposed to read the CPU cycle counter... */
1791 /* db1_printf("this is supposed to do something useful too!??\n"); */
1792 return (0);
1793 }
1794 #if 0
1795 int
1796 rf_GetSpareTableFromDaemon(req)
1797 RF_SparetWait_t *req;
1798 {
1799 int retcode = 1;
1800 printf("This is supposed to do something useful!!\n"); /* XXX */
1801
1802 return (retcode);
1803
1804 }
1805 #endif
1806
1807 static void
1808 raidgetdefaultlabel(raidPtr, rs, lp)
1809 RF_Raid_t *raidPtr;
1810 struct raid_softc *rs;
1811 struct disklabel *lp;
1812 {
1813 db1_printf(("Building a default label...\n"));
1814 bzero(lp, sizeof(*lp));
1815
1816 /* fabricate a label... */
1817 lp->d_secperunit = raidPtr->totalSectors;
1818 lp->d_secsize = raidPtr->bytesPerSector;
1819 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1820 lp->d_ntracks = 1;
1821 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1822 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1823
1824 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1825 lp->d_type = DTYPE_RAID;
1826 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1827 lp->d_rpm = 3600;
1828 lp->d_interleave = 1;
1829 lp->d_flags = 0;
1830
1831 lp->d_partitions[RAW_PART].p_offset = 0;
1832 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1833 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1834 lp->d_npartitions = RAW_PART + 1;
1835
1836 lp->d_magic = DISKMAGIC;
1837 lp->d_magic2 = DISKMAGIC;
1838 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1839
1840 }
1841 /*
1842 * Read the disklabel from the raid device. If one is not present, fake one
1843 * up.
1844 */
1845 static void
1846 raidgetdisklabel(dev)
1847 dev_t dev;
1848 {
1849 int unit = raidunit(dev);
1850 struct raid_softc *rs = &raid_softc[unit];
1851 char *errstring;
1852 struct disklabel *lp = rs->sc_dkdev.dk_label;
1853 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1854 RF_Raid_t *raidPtr;
1855
1856 db1_printf(("Getting the disklabel...\n"));
1857
1858 bzero(clp, sizeof(*clp));
1859
1860 raidPtr = raidPtrs[unit];
1861
1862 raidgetdefaultlabel(raidPtr, rs, lp);
1863
1864 /*
1865 * Call the generic disklabel extraction routine.
1866 */
1867 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1868 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1869 if (errstring)
1870 raidmakedisklabel(rs);
1871 else {
1872 int i;
1873 struct partition *pp;
1874
1875 /*
1876 * Sanity check whether the found disklabel is valid.
1877 *
1878 * This is necessary since total size of the raid device
1879 * may vary when an interleave is changed even though exactly
1880 * same componets are used, and old disklabel may used
1881 * if that is found.
1882 */
1883 if (lp->d_secperunit != rs->sc_size)
1884 printf("WARNING: %s: "
1885 "total sector size in disklabel (%d) != "
1886 "the size of raid (%ld)\n", rs->sc_xname,
1887 lp->d_secperunit, (long) rs->sc_size);
1888 for (i = 0; i < lp->d_npartitions; i++) {
1889 pp = &lp->d_partitions[i];
1890 if (pp->p_offset + pp->p_size > rs->sc_size)
1891 printf("WARNING: %s: end of partition `%c' "
1892 "exceeds the size of raid (%ld)\n",
1893 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1894 }
1895 }
1896
1897 }
1898 /*
1899 * Take care of things one might want to take care of in the event
1900 * that a disklabel isn't present.
1901 */
1902 static void
1903 raidmakedisklabel(rs)
1904 struct raid_softc *rs;
1905 {
1906 struct disklabel *lp = rs->sc_dkdev.dk_label;
1907 db1_printf(("Making a label..\n"));
1908
1909 /*
1910 * For historical reasons, if there's no disklabel present
1911 * the raw partition must be marked FS_BSDFFS.
1912 */
1913
1914 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1915
1916 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1917
1918 lp->d_checksum = dkcksum(lp);
1919 }
1920 /*
1921 * Lookup the provided name in the filesystem. If the file exists,
1922 * is a valid block device, and isn't being used by anyone else,
1923 * set *vpp to the file's vnode.
1924 * You'll find the original of this in ccd.c
1925 */
1926 int
1927 raidlookup(path, p, vpp)
1928 char *path;
1929 struct proc *p;
1930 struct vnode **vpp; /* result */
1931 {
1932 struct nameidata nd;
1933 struct vnode *vp;
1934 struct vattr va;
1935 int error;
1936
1937 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1938 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1939 #ifdef DEBUG
1940 printf("RAIDframe: vn_open returned %d\n", error);
1941 #endif
1942 return (error);
1943 }
1944 vp = nd.ni_vp;
1945 if (vp->v_usecount > 1) {
1946 VOP_UNLOCK(vp, 0);
1947 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1948 return (EBUSY);
1949 }
1950 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1951 VOP_UNLOCK(vp, 0);
1952 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1953 return (error);
1954 }
1955 /* XXX: eventually we should handle VREG, too. */
1956 if (va.va_type != VBLK) {
1957 VOP_UNLOCK(vp, 0);
1958 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1959 return (ENOTBLK);
1960 }
1961 VOP_UNLOCK(vp, 0);
1962 *vpp = vp;
1963 return (0);
1964 }
1965 /*
1966 * Wait interruptibly for an exclusive lock.
1967 *
1968 * XXX
1969 * Several drivers do this; it should be abstracted and made MP-safe.
1970 * (Hmm... where have we seen this warning before :-> GO )
1971 */
1972 static int
1973 raidlock(rs)
1974 struct raid_softc *rs;
1975 {
1976 int error;
1977
1978 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1979 rs->sc_flags |= RAIDF_WANTED;
1980 if ((error =
1981 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1982 return (error);
1983 }
1984 rs->sc_flags |= RAIDF_LOCKED;
1985 return (0);
1986 }
1987 /*
1988 * Unlock and wake up any waiters.
1989 */
1990 static void
1991 raidunlock(rs)
1992 struct raid_softc *rs;
1993 {
1994
1995 rs->sc_flags &= ~RAIDF_LOCKED;
1996 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1997 rs->sc_flags &= ~RAIDF_WANTED;
1998 wakeup(rs);
1999 }
2000 }
2001
2002
2003 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2004 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2005
2006 int
2007 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2008 {
2009 RF_ComponentLabel_t component_label;
2010 raidread_component_label(dev, b_vp, &component_label);
2011 component_label.mod_counter = mod_counter;
2012 component_label.clean = RF_RAID_CLEAN;
2013 raidwrite_component_label(dev, b_vp, &component_label);
2014 return(0);
2015 }
2016
2017
2018 int
2019 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2020 {
2021 RF_ComponentLabel_t component_label;
2022 raidread_component_label(dev, b_vp, &component_label);
2023 component_label.mod_counter = mod_counter;
2024 component_label.clean = RF_RAID_DIRTY;
2025 raidwrite_component_label(dev, b_vp, &component_label);
2026 return(0);
2027 }
2028
2029 /* ARGSUSED */
2030 int
2031 raidread_component_label(dev, b_vp, component_label)
2032 dev_t dev;
2033 struct vnode *b_vp;
2034 RF_ComponentLabel_t *component_label;
2035 {
2036 struct buf *bp;
2037 int error;
2038
2039 /* XXX should probably ensure that we don't try to do this if
2040 someone has changed rf_protected_sectors. */
2041
2042 /* get a block of the appropriate size... */
2043 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2044 bp->b_dev = dev;
2045
2046 /* get our ducks in a row for the read */
2047 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2048 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2049 bp->b_flags = B_BUSY | B_READ;
2050 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2051
2052 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2053
2054 error = biowait(bp);
2055
2056 if (!error) {
2057 memcpy(component_label, bp->b_un.b_addr,
2058 sizeof(RF_ComponentLabel_t));
2059 #if 0
2060 printf("raidread_component_label: got component label:\n");
2061 printf("Version: %d\n",component_label->version);
2062 printf("Serial Number: %d\n",component_label->serial_number);
2063 printf("Mod counter: %d\n",component_label->mod_counter);
2064 printf("Row: %d\n", component_label->row);
2065 printf("Column: %d\n", component_label->column);
2066 printf("Num Rows: %d\n", component_label->num_rows);
2067 printf("Num Columns: %d\n", component_label->num_columns);
2068 printf("Clean: %d\n", component_label->clean);
2069 printf("Status: %d\n", component_label->status);
2070 #endif
2071 } else {
2072 printf("Failed to read RAID component label!\n");
2073 }
2074
2075 bp->b_flags = B_INVAL | B_AGE;
2076 brelse(bp);
2077 return(error);
2078 }
2079 /* ARGSUSED */
2080 int
2081 raidwrite_component_label(dev, b_vp, component_label)
2082 dev_t dev;
2083 struct vnode *b_vp;
2084 RF_ComponentLabel_t *component_label;
2085 {
2086 struct buf *bp;
2087 int error;
2088
2089 /* get a block of the appropriate size... */
2090 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2091 bp->b_dev = dev;
2092
2093 /* get our ducks in a row for the write */
2094 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2095 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2096 bp->b_flags = B_BUSY | B_WRITE;
2097 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2098
2099 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2100
2101 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2102
2103 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2104 error = biowait(bp);
2105 bp->b_flags = B_INVAL | B_AGE;
2106 brelse(bp);
2107 if (error) {
2108 printf("Failed to write RAID component info!\n");
2109 }
2110
2111 return(error);
2112 }
2113
2114 void
2115 rf_markalldirty( raidPtr )
2116 RF_Raid_t *raidPtr;
2117 {
2118 RF_ComponentLabel_t c_label;
2119 int r,c;
2120
2121 raidPtr->mod_counter++;
2122 for (r = 0; r < raidPtr->numRow; r++) {
2123 for (c = 0; c < raidPtr->numCol; c++) {
2124 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2125 raidread_component_label(
2126 raidPtr->Disks[r][c].dev,
2127 raidPtr->raid_cinfo[r][c].ci_vp,
2128 &c_label);
2129 if (c_label.status == rf_ds_spared) {
2130 /* XXX do something special...
2131 but whatever you do, don't
2132 try to access it!! */
2133 } else {
2134 #if 0
2135 c_label.status =
2136 raidPtr->Disks[r][c].status;
2137 raidwrite_component_label(
2138 raidPtr->Disks[r][c].dev,
2139 raidPtr->raid_cinfo[r][c].ci_vp,
2140 &c_label);
2141 #endif
2142 raidmarkdirty(
2143 raidPtr->Disks[r][c].dev,
2144 raidPtr->raid_cinfo[r][c].ci_vp,
2145 raidPtr->mod_counter);
2146 }
2147 }
2148 }
2149 }
2150 /* printf("Component labels marked dirty.\n"); */
2151 #if 0
2152 for( c = 0; c < raidPtr->numSpare ; c++) {
2153 sparecol = raidPtr->numCol + c;
2154 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2155 /*
2156
2157 XXX this is where we get fancy and map this spare
2158 into it's correct spot in the array.
2159
2160 */
2161 /*
2162
2163 we claim this disk is "optimal" if it's
2164 rf_ds_used_spare, as that means it should be
2165 directly substitutable for the disk it replaced.
2166 We note that too...
2167
2168 */
2169
2170 for(i=0;i<raidPtr->numRow;i++) {
2171 for(j=0;j<raidPtr->numCol;j++) {
2172 if ((raidPtr->Disks[i][j].spareRow ==
2173 r) &&
2174 (raidPtr->Disks[i][j].spareCol ==
2175 sparecol)) {
2176 srow = r;
2177 scol = sparecol;
2178 break;
2179 }
2180 }
2181 }
2182
2183 raidread_component_label(
2184 raidPtr->Disks[r][sparecol].dev,
2185 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2186 &c_label);
2187 /* make sure status is noted */
2188 c_label.version = RF_COMPONENT_LABEL_VERSION;
2189 c_label.mod_counter = raidPtr->mod_counter;
2190 c_label.serial_number = raidPtr->serial_number;
2191 c_label.row = srow;
2192 c_label.column = scol;
2193 c_label.num_rows = raidPtr->numRow;
2194 c_label.num_columns = raidPtr->numCol;
2195 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2196 c_label.status = rf_ds_optimal;
2197 raidwrite_component_label(
2198 raidPtr->Disks[r][sparecol].dev,
2199 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2200 &c_label);
2201 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2202 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2203 }
2204 }
2205
2206 #endif
2207 }
2208
2209
2210 void
2211 rf_update_component_labels( raidPtr )
2212 RF_Raid_t *raidPtr;
2213 {
2214 RF_ComponentLabel_t c_label;
2215 int sparecol;
2216 int r,c;
2217 int i,j;
2218 int srow, scol;
2219
2220 srow = -1;
2221 scol = -1;
2222
2223 /* XXX should do extra checks to make sure things really are clean,
2224 rather than blindly setting the clean bit... */
2225
2226 raidPtr->mod_counter++;
2227
2228 for (r = 0; r < raidPtr->numRow; r++) {
2229 for (c = 0; c < raidPtr->numCol; c++) {
2230 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2231 raidread_component_label(
2232 raidPtr->Disks[r][c].dev,
2233 raidPtr->raid_cinfo[r][c].ci_vp,
2234 &c_label);
2235 /* make sure status is noted */
2236 c_label.status = rf_ds_optimal;
2237 raidwrite_component_label(
2238 raidPtr->Disks[r][c].dev,
2239 raidPtr->raid_cinfo[r][c].ci_vp,
2240 &c_label);
2241 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2242 raidmarkclean(
2243 raidPtr->Disks[r][c].dev,
2244 raidPtr->raid_cinfo[r][c].ci_vp,
2245 raidPtr->mod_counter);
2246 }
2247 }
2248 /* else we don't touch it.. */
2249 #if 0
2250 else if (raidPtr->Disks[r][c].status !=
2251 rf_ds_failed) {
2252 raidread_component_label(
2253 raidPtr->Disks[r][c].dev,
2254 raidPtr->raid_cinfo[r][c].ci_vp,
2255 &c_label);
2256 /* make sure status is noted */
2257 c_label.status =
2258 raidPtr->Disks[r][c].status;
2259 raidwrite_component_label(
2260 raidPtr->Disks[r][c].dev,
2261 raidPtr->raid_cinfo[r][c].ci_vp,
2262 &c_label);
2263 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2264 raidmarkclean(
2265 raidPtr->Disks[r][c].dev,
2266 raidPtr->raid_cinfo[r][c].ci_vp,
2267 raidPtr->mod_counter);
2268 }
2269 }
2270 #endif
2271 }
2272 }
2273
2274 for( c = 0; c < raidPtr->numSpare ; c++) {
2275 sparecol = raidPtr->numCol + c;
2276 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2277 /*
2278
2279 we claim this disk is "optimal" if it's
2280 rf_ds_used_spare, as that means it should be
2281 directly substitutable for the disk it replaced.
2282 We note that too...
2283
2284 */
2285
2286 for(i=0;i<raidPtr->numRow;i++) {
2287 for(j=0;j<raidPtr->numCol;j++) {
2288 if ((raidPtr->Disks[i][j].spareRow ==
2289 0) &&
2290 (raidPtr->Disks[i][j].spareCol ==
2291 sparecol)) {
2292 srow = i;
2293 scol = j;
2294 break;
2295 }
2296 }
2297 }
2298
2299 raidread_component_label(
2300 raidPtr->Disks[0][sparecol].dev,
2301 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2302 &c_label);
2303 /* make sure status is noted */
2304 c_label.version = RF_COMPONENT_LABEL_VERSION;
2305 c_label.mod_counter = raidPtr->mod_counter;
2306 c_label.serial_number = raidPtr->serial_number;
2307 c_label.row = srow;
2308 c_label.column = scol;
2309 c_label.num_rows = raidPtr->numRow;
2310 c_label.num_columns = raidPtr->numCol;
2311 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2312 c_label.status = rf_ds_optimal;
2313 raidwrite_component_label(
2314 raidPtr->Disks[0][sparecol].dev,
2315 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2316 &c_label);
2317 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2318 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2319 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2320 raidPtr->mod_counter);
2321 }
2322 }
2323 }
2324 /* printf("Component labels updated\n"); */
2325 }
2326