rf_netbsdkintf.c revision 1.25 1 /* $NetBSD: rf_netbsdkintf.c,v 1.25 1999/08/13 03:27:46 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 case RAIDFRAME_CHECK_PARITY:
736 if ((rs->sc_flags & RAIDF_INITED) == 0)
737 return (ENXIO);
738 }
739
740 switch (cmd) {
741
742
743 /* configure the system */
744 case RAIDFRAME_CONFIGURE:
745
746 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
747 /* copy-in the configuration information */
748 /* data points to a pointer to the configuration structure */
749 u_cfg = *((RF_Config_t **) data);
750 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
751 if (k_cfg == NULL) {
752 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
753 return (ENOMEM);
754 }
755 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
756 sizeof(RF_Config_t));
757 if (retcode) {
758 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
759 retcode));
760 return (retcode);
761 }
762 /* allocate a buffer for the layout-specific data, and copy it
763 * in */
764 if (k_cfg->layoutSpecificSize) {
765 if (k_cfg->layoutSpecificSize > 10000) {
766 /* sanity check */
767 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
768 return (EINVAL);
769 }
770 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
771 (u_char *));
772 if (specific_buf == NULL) {
773 RF_Free(k_cfg, sizeof(RF_Config_t));
774 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
775 return (ENOMEM);
776 }
777 retcode = copyin(k_cfg->layoutSpecific,
778 (caddr_t) specific_buf,
779 k_cfg->layoutSpecificSize);
780 if (retcode) {
781 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
782 retcode));
783 return (retcode);
784 }
785 } else
786 specific_buf = NULL;
787 k_cfg->layoutSpecific = specific_buf;
788
789 /* should do some kind of sanity check on the configuration.
790 * Store the sum of all the bytes in the last byte? */
791
792 #if 0
793 db1_printf(("Considering configuring the system.:%d 0x%x\n",
794 unit, p));
795 #endif
796
797 /* We need the pointer to this a little deeper, so stash it
798 * here... */
799
800 raidPtrs[unit]->proc = p;
801
802 /* configure the system */
803
804 raidPtrs[unit]->raidid = unit;
805
806 retcode = rf_Configure(raidPtrs[unit], k_cfg);
807
808 /* allow this many simultaneous IO's to this RAID device */
809 raidPtrs[unit]->openings = RAIDOUTSTANDING;
810
811 if (retcode == 0) {
812 retcode = raidinit(dev, raidPtrs[unit], unit);
813 rf_markalldirty( raidPtrs[unit] );
814 }
815 /* free the buffers. No return code here. */
816 if (k_cfg->layoutSpecificSize) {
817 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
818 }
819 RF_Free(k_cfg, sizeof(RF_Config_t));
820
821 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
822 retcode));
823
824 return (retcode);
825
826 /* shutdown the system */
827 case RAIDFRAME_SHUTDOWN:
828
829 if ((error = raidlock(rs)) != 0)
830 return (error);
831
832 /*
833 * If somebody has a partition mounted, we shouldn't
834 * shutdown.
835 */
836
837 part = DISKPART(dev);
838 pmask = (1 << part);
839 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
840 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
841 (rs->sc_dkdev.dk_copenmask & pmask))) {
842 raidunlock(rs);
843 return (EBUSY);
844 }
845
846 if (rf_debugKernelAccess) {
847 printf("call shutdown\n");
848 }
849 raidPtrs[unit]->proc = p; /* XXX necessary evil */
850
851 retcode = rf_Shutdown(raidPtrs[unit]);
852
853 db1_printf(("Done main shutdown\n"));
854
855 pool_destroy(&rs->sc_cbufpool);
856 db1_printf(("Done freeing component buffer freelist\n"));
857
858 /* It's no longer initialized... */
859 rs->sc_flags &= ~RAIDF_INITED;
860
861 /* Detach the disk. */
862 disk_detach(&rs->sc_dkdev);
863
864 raidunlock(rs);
865
866 return (retcode);
867 case RAIDFRAME_GET_COMPONENT_LABEL:
868 c_label_ptr = (RF_ComponentLabel_t **) data;
869 /* need to read the component label for the disk indicated
870 by row,column in component_label
871 XXX need to sanity check these values!!!
872 */
873
874 /* For practice, let's get it directly fromdisk, rather
875 than from the in-core copy */
876 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
877 (RF_ComponentLabel_t *));
878 if (component_label == NULL)
879 return (ENOMEM);
880
881 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
882
883 retcode = copyin( *c_label_ptr, component_label,
884 sizeof(RF_ComponentLabel_t));
885
886 if (retcode) {
887 return(retcode);
888 }
889
890 row = component_label->row;
891 printf("Row: %d\n",row);
892 if (row > raidPtrs[unit]->numRow) {
893 row = 0; /* XXX */
894 }
895 column = component_label->column;
896 printf("Column: %d\n",column);
897 if (column > raidPtrs[unit]->numCol) {
898 column = 0; /* XXX */
899 }
900
901 raidread_component_label(
902 raidPtrs[unit]->Disks[row][column].dev,
903 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
904 component_label );
905
906 retcode = copyout((caddr_t) component_label,
907 (caddr_t) *c_label_ptr,
908 sizeof(RF_ComponentLabel_t));
909 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
910 return (retcode);
911
912 case RAIDFRAME_SET_COMPONENT_LABEL:
913 component_label = (RF_ComponentLabel_t *) data;
914
915 /* XXX check the label for valid stuff... */
916 /* Note that some things *should not* get modified --
917 the user should be re-initing the labels instead of
918 trying to patch things.
919 */
920
921 printf("Got component label:\n");
922 printf("Version: %d\n",component_label->version);
923 printf("Serial Number: %d\n",component_label->serial_number);
924 printf("Mod counter: %d\n",component_label->mod_counter);
925 printf("Row: %d\n", component_label->row);
926 printf("Column: %d\n", component_label->column);
927 printf("Num Rows: %d\n", component_label->num_rows);
928 printf("Num Columns: %d\n", component_label->num_columns);
929 printf("Clean: %d\n", component_label->clean);
930 printf("Status: %d\n", component_label->status);
931
932 row = component_label->row;
933 column = component_label->column;
934
935 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
936 (column < 0) || (column > raidPtrs[unit]->numCol)) {
937 return(EINVAL);
938 }
939
940 /* XXX this isn't allowed to do anything for now :-) */
941 #if 0
942 raidwrite_component_label(
943 raidPtrs[unit]->Disks[row][column].dev,
944 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
945 component_label );
946 #endif
947 return (0);
948
949 case RAIDFRAME_INIT_LABELS:
950 component_label = (RF_ComponentLabel_t *) data;
951 /*
952 we only want the serial number from
953 the above. We get all the rest of the information
954 from the config that was used to create this RAID
955 set.
956 */
957
958 raidPtrs[unit]->serial_number = component_label->serial_number;
959 /* current version number */
960 ci_label.version = RF_COMPONENT_LABEL_VERSION;
961 ci_label.serial_number = component_label->serial_number;
962 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
963 ci_label.num_rows = raidPtrs[unit]->numRow;
964 ci_label.num_columns = raidPtrs[unit]->numCol;
965 ci_label.clean = RF_RAID_DIRTY; /* not clean */
966 ci_label.status = rf_ds_optimal; /* "It's good!" */
967
968 for(row=0;row<raidPtrs[unit]->numRow;row++) {
969 ci_label.row = row;
970 for(column=0;column<raidPtrs[unit]->numCol;column++) {
971 ci_label.column = column;
972 raidwrite_component_label(
973 raidPtrs[unit]->Disks[row][column].dev,
974 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
975 &ci_label );
976 }
977 }
978
979 return (retcode);
980
981 /* initialize all parity */
982 case RAIDFRAME_REWRITEPARITY:
983
984 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
985 /* Parity for RAID 0 is trivially correct */
986 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
987 return(0);
988 }
989
990 /* borrow the thread of the requesting process */
991 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
992 s = splbio();
993 retcode = rf_RewriteParity(raidPtrs[unit]);
994 splx(s);
995 /* return I/O Error if the parity rewrite fails */
996
997 if (retcode) {
998 retcode = EIO;
999 } else {
1000 /* set the clean bit! If we shutdown correctly,
1001 the clean bit on each component label will get
1002 set */
1003 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1004 }
1005 return (retcode);
1006
1007
1008 case RAIDFRAME_ADD_HOT_SPARE:
1009 sparePtr = (RF_SingleComponent_t *) data;
1010 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1011 printf("Adding spare\n");
1012 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1013 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1014 return(retcode);
1015
1016 case RAIDFRAME_REMOVE_HOT_SPARE:
1017 return(retcode);
1018
1019 case RAIDFRAME_REBUILD_IN_PLACE:
1020
1021 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1022 /* Can't do this on a RAID 0!! */
1023 return(EINVAL);
1024 }
1025
1026 componentPtr = (RF_SingleComponent_t *) data;
1027 memcpy( &component, componentPtr,
1028 sizeof(RF_SingleComponent_t));
1029 row = component.row;
1030 column = component.column;
1031 printf("Rebuild: %d %d\n",row, column);
1032 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1033 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1034 return(EINVAL);
1035 }
1036 printf("Attempting a rebuild in place\n");
1037 s = splbio();
1038 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1039 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1040 splx(s);
1041 return(retcode);
1042
1043 /* issue a test-unit-ready through raidframe to the indicated
1044 * device */
1045 #if 0 /* XXX not supported yet (ever?) */
1046 case RAIDFRAME_TUR:
1047 /* debug only */
1048 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1049 return (retcode);
1050 #endif
1051 case RAIDFRAME_GET_INFO:
1052 {
1053 RF_Raid_t *raid = raidPtrs[unit];
1054 RF_DeviceConfig_t *cfg, **ucfgp;
1055 int i, j, d;
1056
1057 if (!raid->valid)
1058 return (ENODEV);
1059 ucfgp = (RF_DeviceConfig_t **) data;
1060 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1061 (RF_DeviceConfig_t *));
1062 if (cfg == NULL)
1063 return (ENOMEM);
1064 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1065 cfg->rows = raid->numRow;
1066 cfg->cols = raid->numCol;
1067 cfg->ndevs = raid->numRow * raid->numCol;
1068 if (cfg->ndevs >= RF_MAX_DISKS) {
1069 cfg->ndevs = 0;
1070 return (ENOMEM);
1071 }
1072 cfg->nspares = raid->numSpare;
1073 if (cfg->nspares >= RF_MAX_DISKS) {
1074 cfg->nspares = 0;
1075 return (ENOMEM);
1076 }
1077 cfg->maxqdepth = raid->maxQueueDepth;
1078 d = 0;
1079 for (i = 0; i < cfg->rows; i++) {
1080 for (j = 0; j < cfg->cols; j++) {
1081 cfg->devs[d] = raid->Disks[i][j];
1082 d++;
1083 }
1084 }
1085 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1086 cfg->spares[i] = raid->Disks[0][j];
1087 }
1088 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1089 sizeof(RF_DeviceConfig_t));
1090 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1091
1092 return (retcode);
1093 }
1094 break;
1095 case RAIDFRAME_CHECK_PARITY:
1096 *(int *) data = raidPtrs[unit]->parity_good;
1097 return (0);
1098 case RAIDFRAME_RESET_ACCTOTALS:
1099 {
1100 RF_Raid_t *raid = raidPtrs[unit];
1101
1102 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1103 return (0);
1104 }
1105 break;
1106
1107 case RAIDFRAME_GET_ACCTOTALS:
1108 {
1109 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1110 RF_Raid_t *raid = raidPtrs[unit];
1111
1112 *totals = raid->acc_totals;
1113 return (0);
1114 }
1115 break;
1116
1117 case RAIDFRAME_KEEP_ACCTOTALS:
1118 {
1119 RF_Raid_t *raid = raidPtrs[unit];
1120 int *keep = (int *) data;
1121
1122 raid->keep_acc_totals = *keep;
1123 return (0);
1124 }
1125 break;
1126
1127 case RAIDFRAME_GET_SIZE:
1128 *(int *) data = raidPtrs[unit]->totalSectors;
1129 return (0);
1130
1131 #define RAIDFRAME_RECON 1
1132 /* XXX The above should probably be set somewhere else!! GO */
1133 #if RAIDFRAME_RECON > 0
1134
1135 /* fail a disk & optionally start reconstruction */
1136 case RAIDFRAME_FAIL_DISK:
1137
1138 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1139 /* Can't do this on a RAID 0!! */
1140 return(EINVAL);
1141 }
1142
1143 rr = (struct rf_recon_req *) data;
1144
1145 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1146 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1147 return (EINVAL);
1148
1149 printf("raid%d: Failing the disk: row: %d col: %d\n",
1150 unit, rr->row, rr->col);
1151
1152 /* make a copy of the recon request so that we don't rely on
1153 * the user's buffer */
1154 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1155 bcopy(rr, rrcopy, sizeof(*rr));
1156 rrcopy->raidPtr = (void *) raidPtrs[unit];
1157
1158 LOCK_RECON_Q_MUTEX();
1159 rrcopy->next = recon_queue;
1160 recon_queue = rrcopy;
1161 wakeup(&recon_queue);
1162 UNLOCK_RECON_Q_MUTEX();
1163
1164 return (0);
1165
1166 /* invoke a copyback operation after recon on whatever disk
1167 * needs it, if any */
1168 case RAIDFRAME_COPYBACK:
1169
1170 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1171 /* This makes no sense on a RAID 0!! */
1172 return(EINVAL);
1173 }
1174
1175 /* borrow the current thread to get this done */
1176 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1177 s = splbio();
1178 rf_CopybackReconstructedData(raidPtrs[unit]);
1179 splx(s);
1180 return (0);
1181
1182 /* return the percentage completion of reconstruction */
1183 case RAIDFRAME_CHECKRECON:
1184 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1185 /* This makes no sense on a RAID 0 */
1186 return(EINVAL);
1187 }
1188
1189 row = *(int *) data;
1190 if (row < 0 || row >= raidPtrs[unit]->numRow)
1191 return (EINVAL);
1192 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1193 *(int *) data = 100;
1194 else
1195 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1196 return (0);
1197
1198 /* the sparetable daemon calls this to wait for the kernel to
1199 * need a spare table. this ioctl does not return until a
1200 * spare table is needed. XXX -- calling mpsleep here in the
1201 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1202 * -- I should either compute the spare table in the kernel,
1203 * or have a different -- XXX XXX -- interface (a different
1204 * character device) for delivering the table -- XXX */
1205 #if 0
1206 case RAIDFRAME_SPARET_WAIT:
1207 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1208 while (!rf_sparet_wait_queue)
1209 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1210 waitreq = rf_sparet_wait_queue;
1211 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1212 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1213
1214 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1215
1216 RF_Free(waitreq, sizeof(*waitreq));
1217 return (0);
1218
1219
1220 /* wakes up a process waiting on SPARET_WAIT and puts an error
1221 * code in it that will cause the dameon to exit */
1222 case RAIDFRAME_ABORT_SPARET_WAIT:
1223 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1224 waitreq->fcol = -1;
1225 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1226 waitreq->next = rf_sparet_wait_queue;
1227 rf_sparet_wait_queue = waitreq;
1228 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1229 wakeup(&rf_sparet_wait_queue);
1230 return (0);
1231
1232 /* used by the spare table daemon to deliver a spare table
1233 * into the kernel */
1234 case RAIDFRAME_SEND_SPARET:
1235
1236 /* install the spare table */
1237 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1238
1239 /* respond to the requestor. the return status of the spare
1240 * table installation is passed in the "fcol" field */
1241 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1242 waitreq->fcol = retcode;
1243 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1244 waitreq->next = rf_sparet_resp_queue;
1245 rf_sparet_resp_queue = waitreq;
1246 wakeup(&rf_sparet_resp_queue);
1247 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1248
1249 return (retcode);
1250 #endif
1251
1252
1253 #endif /* RAIDFRAME_RECON > 0 */
1254
1255 default:
1256 break; /* fall through to the os-specific code below */
1257
1258 }
1259
1260 if (!raidPtrs[unit]->valid)
1261 return (EINVAL);
1262
1263 /*
1264 * Add support for "regular" device ioctls here.
1265 */
1266
1267 switch (cmd) {
1268 case DIOCGDINFO:
1269 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1270 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1271 break;
1272
1273 case DIOCGPART:
1274 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1275 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1276 ((struct partinfo *) data)->part =
1277 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1278 break;
1279
1280 case DIOCWDINFO:
1281 db1_printf(("DIOCWDINFO\n"));
1282 case DIOCSDINFO:
1283 db1_printf(("DIOCSDINFO\n"));
1284 if ((error = raidlock(rs)) != 0)
1285 return (error);
1286
1287 rs->sc_flags |= RAIDF_LABELLING;
1288
1289 error = setdisklabel(rs->sc_dkdev.dk_label,
1290 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1291 if (error == 0) {
1292 if (cmd == DIOCWDINFO)
1293 error = writedisklabel(RAIDLABELDEV(dev),
1294 raidstrategy, rs->sc_dkdev.dk_label,
1295 rs->sc_dkdev.dk_cpulabel);
1296 }
1297 rs->sc_flags &= ~RAIDF_LABELLING;
1298
1299 raidunlock(rs);
1300
1301 if (error)
1302 return (error);
1303 break;
1304
1305 case DIOCWLABEL:
1306 db1_printf(("DIOCWLABEL\n"));
1307 if (*(int *) data != 0)
1308 rs->sc_flags |= RAIDF_WLABEL;
1309 else
1310 rs->sc_flags &= ~RAIDF_WLABEL;
1311 break;
1312
1313 case DIOCGDEFLABEL:
1314 db1_printf(("DIOCGDEFLABEL\n"));
1315 raidgetdefaultlabel(raidPtrs[unit], rs,
1316 (struct disklabel *) data);
1317 break;
1318
1319 default:
1320 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1321 }
1322 return (retcode);
1323
1324 }
1325
1326
1327 /* raidinit -- complete the rest of the initialization for the
1328 RAIDframe device. */
1329
1330
1331 static int
1332 raidinit(dev, raidPtr, unit)
1333 dev_t dev;
1334 RF_Raid_t *raidPtr;
1335 int unit;
1336 {
1337 int retcode;
1338 /* int ix; */
1339 /* struct raidbuf *raidbp; */
1340 struct raid_softc *rs;
1341
1342 retcode = 0;
1343
1344 rs = &raid_softc[unit];
1345 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1346 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1347
1348
1349 /* XXX should check return code first... */
1350 rs->sc_flags |= RAIDF_INITED;
1351
1352 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1353
1354 rs->sc_dkdev.dk_name = rs->sc_xname;
1355
1356 /* disk_attach actually creates space for the CPU disklabel, among
1357 * other things, so it's critical to call this *BEFORE* we try putzing
1358 * with disklabels. */
1359
1360 disk_attach(&rs->sc_dkdev);
1361
1362 /* XXX There may be a weird interaction here between this, and
1363 * protectedSectors, as used in RAIDframe. */
1364
1365 rs->sc_size = raidPtr->totalSectors;
1366 rs->sc_dev = dev;
1367
1368 return (retcode);
1369 }
1370
1371 /*
1372 * This kernel thread never exits. It is created once, and persists
1373 * until the system reboots.
1374 */
1375
1376 void
1377 rf_ReconKernelThread()
1378 {
1379 struct rf_recon_req *req;
1380 int s;
1381
1382 /* XXX not sure what spl() level we should be at here... probably
1383 * splbio() */
1384 s = splbio();
1385
1386 while (1) {
1387 /* grab the next reconstruction request from the queue */
1388 LOCK_RECON_Q_MUTEX();
1389 while (!recon_queue) {
1390 UNLOCK_RECON_Q_MUTEX();
1391 tsleep(&recon_queue, PRIBIO,
1392 "raidframe recon", 0);
1393 LOCK_RECON_Q_MUTEX();
1394 }
1395 req = recon_queue;
1396 recon_queue = recon_queue->next;
1397 UNLOCK_RECON_Q_MUTEX();
1398
1399 /*
1400 * If flags specifies that we should start recon, this call
1401 * will not return until reconstruction completes, fails,
1402 * or is aborted.
1403 */
1404 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1405 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1406
1407 RF_Free(req, sizeof(*req));
1408 }
1409 }
1410 /* wake up the daemon & tell it to get us a spare table
1411 * XXX
1412 * the entries in the queues should be tagged with the raidPtr
1413 * so that in the extremely rare case that two recons happen at once,
1414 * we know for which device were requesting a spare table
1415 * XXX
1416 */
1417 int
1418 rf_GetSpareTableFromDaemon(req)
1419 RF_SparetWait_t *req;
1420 {
1421 int retcode;
1422
1423 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1424 req->next = rf_sparet_wait_queue;
1425 rf_sparet_wait_queue = req;
1426 wakeup(&rf_sparet_wait_queue);
1427
1428 /* mpsleep unlocks the mutex */
1429 while (!rf_sparet_resp_queue) {
1430 tsleep(&rf_sparet_resp_queue, PRIBIO,
1431 "raidframe getsparetable", 0);
1432 #if 0
1433 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1434 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1435 MS_LOCK_SIMPLE);
1436 #endif
1437 }
1438 req = rf_sparet_resp_queue;
1439 rf_sparet_resp_queue = req->next;
1440 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1441
1442 retcode = req->fcol;
1443 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1444 * alloc'd */
1445 return (retcode);
1446 }
1447 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1448 * bp & passes it down.
1449 * any calls originating in the kernel must use non-blocking I/O
1450 * do some extra sanity checking to return "appropriate" error values for
1451 * certain conditions (to make some standard utilities work)
1452 */
1453 int
1454 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1455 RF_Raid_t *raidPtr;
1456 struct buf *bp;
1457 RF_RaidAccessFlags_t flags;
1458 void (*cbFunc) (struct buf *);
1459 void *cbArg;
1460 {
1461 RF_SectorCount_t num_blocks, pb, sum;
1462 RF_RaidAddr_t raid_addr;
1463 int retcode;
1464 struct partition *pp;
1465 daddr_t blocknum;
1466 int unit;
1467 struct raid_softc *rs;
1468 int do_async;
1469
1470 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1471
1472 unit = raidPtr->raidid;
1473 rs = &raid_softc[unit];
1474
1475 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1476 * partition.. Need to make it absolute to the underlying device.. */
1477
1478 blocknum = bp->b_blkno;
1479 if (DISKPART(bp->b_dev) != RAW_PART) {
1480 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1481 blocknum += pp->p_offset;
1482 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1483 pp->p_offset));
1484 } else {
1485 db1_printf(("Is raw..\n"));
1486 }
1487 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1488
1489 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1490 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1491
1492 /* *THIS* is where we adjust what block we're going to... but DO NOT
1493 * TOUCH bp->b_blkno!!! */
1494 raid_addr = blocknum;
1495
1496 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1497 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1498 sum = raid_addr + num_blocks + pb;
1499 if (1 || rf_debugKernelAccess) {
1500 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1501 (int) raid_addr, (int) sum, (int) num_blocks,
1502 (int) pb, (int) bp->b_resid));
1503 }
1504 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1505 || (sum < num_blocks) || (sum < pb)) {
1506 bp->b_error = ENOSPC;
1507 bp->b_flags |= B_ERROR;
1508 bp->b_resid = bp->b_bcount;
1509 biodone(bp);
1510 return (bp->b_error);
1511 }
1512 /*
1513 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1514 */
1515
1516 if (bp->b_bcount & raidPtr->sectorMask) {
1517 bp->b_error = EINVAL;
1518 bp->b_flags |= B_ERROR;
1519 bp->b_resid = bp->b_bcount;
1520 biodone(bp);
1521 return (bp->b_error);
1522 }
1523 db1_printf(("Calling DoAccess..\n"));
1524
1525
1526 /* Put a throttle on the number of requests we handle simultanously */
1527
1528 RF_LOCK_MUTEX(raidPtr->mutex);
1529
1530 while(raidPtr->openings <= 0) {
1531 RF_UNLOCK_MUTEX(raidPtr->mutex);
1532 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1533 RF_LOCK_MUTEX(raidPtr->mutex);
1534 }
1535 raidPtr->openings--;
1536
1537 RF_UNLOCK_MUTEX(raidPtr->mutex);
1538
1539 /*
1540 * Everything is async.
1541 */
1542 do_async = 1;
1543
1544 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1545 * B_READ instead */
1546 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1547 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1548 do_async, raid_addr, num_blocks,
1549 bp->b_un.b_addr,
1550 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1551 NULL, cbFunc, cbArg);
1552 #if 0
1553 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1554 bp->b_data, (int) bp->b_resid));
1555 #endif
1556
1557 return (retcode);
1558 }
1559 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1560
1561 int
1562 rf_DispatchKernelIO(queue, req)
1563 RF_DiskQueue_t *queue;
1564 RF_DiskQueueData_t *req;
1565 {
1566 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1567 struct buf *bp;
1568 struct raidbuf *raidbp = NULL;
1569 struct raid_softc *rs;
1570 int unit;
1571
1572 /* XXX along with the vnode, we also need the softc associated with
1573 * this device.. */
1574
1575 req->queue = queue;
1576
1577 unit = queue->raidPtr->raidid;
1578
1579 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1580
1581 if (unit >= numraid) {
1582 printf("Invalid unit number: %d %d\n", unit, numraid);
1583 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1584 }
1585 rs = &raid_softc[unit];
1586
1587 /* XXX is this the right place? */
1588 disk_busy(&rs->sc_dkdev);
1589
1590 bp = req->bp;
1591 #if 1
1592 /* XXX when there is a physical disk failure, someone is passing us a
1593 * buffer that contains old stuff!! Attempt to deal with this problem
1594 * without taking a performance hit... (not sure where the real bug
1595 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1596
1597 if (bp->b_flags & B_ERROR) {
1598 bp->b_flags &= ~B_ERROR;
1599 }
1600 if (bp->b_error != 0) {
1601 bp->b_error = 0;
1602 }
1603 #endif
1604 raidbp = RAIDGETBUF(rs);
1605
1606 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1607
1608 /*
1609 * context for raidiodone
1610 */
1611 raidbp->rf_obp = bp;
1612 raidbp->req = req;
1613
1614 switch (req->type) {
1615 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1616 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1617 * queue->row, queue->col); */
1618 /* XXX need to do something extra here.. */
1619 /* I'm leaving this in, as I've never actually seen it used,
1620 * and I'd like folks to report it... GO */
1621 printf(("WAKEUP CALLED\n"));
1622 queue->numOutstanding++;
1623
1624 /* XXX need to glue the original buffer into this?? */
1625
1626 KernelWakeupFunc(&raidbp->rf_buf);
1627 break;
1628
1629 case RF_IO_TYPE_READ:
1630 case RF_IO_TYPE_WRITE:
1631
1632 if (req->tracerec) {
1633 RF_ETIMER_START(req->tracerec->timer);
1634 }
1635 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1636 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1637 req->sectorOffset, req->numSector,
1638 req->buf, KernelWakeupFunc, (void *) req,
1639 queue->raidPtr->logBytesPerSector, req->b_proc);
1640
1641 if (rf_debugKernelAccess) {
1642 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1643 (long) bp->b_blkno));
1644 }
1645 queue->numOutstanding++;
1646 queue->last_deq_sector = req->sectorOffset;
1647 /* acc wouldn't have been let in if there were any pending
1648 * reqs at any other priority */
1649 queue->curPriority = req->priority;
1650 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1651 * req->type, queue->row, queue->col); */
1652
1653 db1_printf(("Going for %c to unit %d row %d col %d\n",
1654 req->type, unit, queue->row, queue->col));
1655 db1_printf(("sector %d count %d (%d bytes) %d\n",
1656 (int) req->sectorOffset, (int) req->numSector,
1657 (int) (req->numSector <<
1658 queue->raidPtr->logBytesPerSector),
1659 (int) queue->raidPtr->logBytesPerSector));
1660 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1661 raidbp->rf_buf.b_vp->v_numoutput++;
1662 }
1663 VOP_STRATEGY(&raidbp->rf_buf);
1664
1665 break;
1666
1667 default:
1668 panic("bad req->type in rf_DispatchKernelIO");
1669 }
1670 db1_printf(("Exiting from DispatchKernelIO\n"));
1671 return (0);
1672 }
1673 /* this is the callback function associated with a I/O invoked from
1674 kernel code.
1675 */
1676 static void
1677 KernelWakeupFunc(vbp)
1678 struct buf *vbp;
1679 {
1680 RF_DiskQueueData_t *req = NULL;
1681 RF_DiskQueue_t *queue;
1682 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1683 struct buf *bp;
1684 struct raid_softc *rs;
1685 int unit;
1686 register int s;
1687
1688 s = splbio(); /* XXX */
1689 db1_printf(("recovering the request queue:\n"));
1690 req = raidbp->req;
1691
1692 bp = raidbp->rf_obp;
1693 #if 0
1694 db1_printf(("bp=0x%x\n", bp));
1695 #endif
1696
1697 queue = (RF_DiskQueue_t *) req->queue;
1698
1699 if (raidbp->rf_buf.b_flags & B_ERROR) {
1700 #if 0
1701 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1702 #endif
1703 bp->b_flags |= B_ERROR;
1704 bp->b_error = raidbp->rf_buf.b_error ?
1705 raidbp->rf_buf.b_error : EIO;
1706 }
1707 #if 0
1708 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1709 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1710 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1711 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1712 #endif
1713
1714 /* XXX methinks this could be wrong... */
1715 #if 1
1716 bp->b_resid = raidbp->rf_buf.b_resid;
1717 #endif
1718
1719 if (req->tracerec) {
1720 RF_ETIMER_STOP(req->tracerec->timer);
1721 RF_ETIMER_EVAL(req->tracerec->timer);
1722 RF_LOCK_MUTEX(rf_tracing_mutex);
1723 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1724 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1725 req->tracerec->num_phys_ios++;
1726 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1727 }
1728 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1729
1730 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1731
1732
1733 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1734 * ballistic, and mark the component as hosed... */
1735 #if 1
1736 if (bp->b_flags & B_ERROR) {
1737 /* Mark the disk as dead */
1738 /* but only mark it once... */
1739 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1740 rf_ds_optimal) {
1741 printf("raid%d: IO Error. Marking %s as failed.\n",
1742 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1743 queue->raidPtr->Disks[queue->row][queue->col].status =
1744 rf_ds_failed;
1745 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1746 queue->raidPtr->numFailures++;
1747 /* XXX here we should bump the version number for each component, and write that data out */
1748 } else { /* Disk is already dead... */
1749 /* printf("Disk already marked as dead!\n"); */
1750 }
1751
1752 }
1753 #endif
1754
1755 rs = &raid_softc[unit];
1756 RAIDPUTBUF(rs, raidbp);
1757
1758
1759 if (bp->b_resid == 0) {
1760 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1761 unit, bp->b_resid, bp->b_bcount));
1762 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1763 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1764 } else {
1765 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1766 }
1767
1768 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1769 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1770 /* printf("Exiting KernelWakeupFunc\n"); */
1771
1772 splx(s); /* XXX */
1773 }
1774
1775
1776
1777 /*
1778 * initialize a buf structure for doing an I/O in the kernel.
1779 */
1780 static void
1781 InitBP(
1782 struct buf * bp,
1783 struct vnode * b_vp,
1784 unsigned rw_flag,
1785 dev_t dev,
1786 RF_SectorNum_t startSect,
1787 RF_SectorCount_t numSect,
1788 caddr_t buf,
1789 void (*cbFunc) (struct buf *),
1790 void *cbArg,
1791 int logBytesPerSector,
1792 struct proc * b_proc)
1793 {
1794 /* bp->b_flags = B_PHYS | rw_flag; */
1795 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1796 bp->b_bcount = numSect << logBytesPerSector;
1797 bp->b_bufsize = bp->b_bcount;
1798 bp->b_error = 0;
1799 bp->b_dev = dev;
1800 db1_printf(("bp->b_dev is %d\n", dev));
1801 bp->b_un.b_addr = buf;
1802 #if 0
1803 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1804 #endif
1805
1806 bp->b_blkno = startSect;
1807 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1808 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1809 if (bp->b_bcount == 0) {
1810 panic("bp->b_bcount is zero in InitBP!!\n");
1811 }
1812 bp->b_proc = b_proc;
1813 bp->b_iodone = cbFunc;
1814 bp->b_vp = b_vp;
1815
1816 }
1817 /* Extras... */
1818
1819 #if 0
1820 int
1821 rf_GetSpareTableFromDaemon(req)
1822 RF_SparetWait_t *req;
1823 {
1824 int retcode = 1;
1825 printf("This is supposed to do something useful!!\n"); /* XXX */
1826
1827 return (retcode);
1828
1829 }
1830 #endif
1831
1832 static void
1833 raidgetdefaultlabel(raidPtr, rs, lp)
1834 RF_Raid_t *raidPtr;
1835 struct raid_softc *rs;
1836 struct disklabel *lp;
1837 {
1838 db1_printf(("Building a default label...\n"));
1839 bzero(lp, sizeof(*lp));
1840
1841 /* fabricate a label... */
1842 lp->d_secperunit = raidPtr->totalSectors;
1843 lp->d_secsize = raidPtr->bytesPerSector;
1844 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1845 lp->d_ntracks = 1;
1846 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1847 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1848
1849 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1850 lp->d_type = DTYPE_RAID;
1851 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1852 lp->d_rpm = 3600;
1853 lp->d_interleave = 1;
1854 lp->d_flags = 0;
1855
1856 lp->d_partitions[RAW_PART].p_offset = 0;
1857 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1858 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1859 lp->d_npartitions = RAW_PART + 1;
1860
1861 lp->d_magic = DISKMAGIC;
1862 lp->d_magic2 = DISKMAGIC;
1863 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1864
1865 }
1866 /*
1867 * Read the disklabel from the raid device. If one is not present, fake one
1868 * up.
1869 */
1870 static void
1871 raidgetdisklabel(dev)
1872 dev_t dev;
1873 {
1874 int unit = raidunit(dev);
1875 struct raid_softc *rs = &raid_softc[unit];
1876 char *errstring;
1877 struct disklabel *lp = rs->sc_dkdev.dk_label;
1878 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1879 RF_Raid_t *raidPtr;
1880
1881 db1_printf(("Getting the disklabel...\n"));
1882
1883 bzero(clp, sizeof(*clp));
1884
1885 raidPtr = raidPtrs[unit];
1886
1887 raidgetdefaultlabel(raidPtr, rs, lp);
1888
1889 /*
1890 * Call the generic disklabel extraction routine.
1891 */
1892 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1893 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1894 if (errstring)
1895 raidmakedisklabel(rs);
1896 else {
1897 int i;
1898 struct partition *pp;
1899
1900 /*
1901 * Sanity check whether the found disklabel is valid.
1902 *
1903 * This is necessary since total size of the raid device
1904 * may vary when an interleave is changed even though exactly
1905 * same componets are used, and old disklabel may used
1906 * if that is found.
1907 */
1908 if (lp->d_secperunit != rs->sc_size)
1909 printf("WARNING: %s: "
1910 "total sector size in disklabel (%d) != "
1911 "the size of raid (%ld)\n", rs->sc_xname,
1912 lp->d_secperunit, (long) rs->sc_size);
1913 for (i = 0; i < lp->d_npartitions; i++) {
1914 pp = &lp->d_partitions[i];
1915 if (pp->p_offset + pp->p_size > rs->sc_size)
1916 printf("WARNING: %s: end of partition `%c' "
1917 "exceeds the size of raid (%ld)\n",
1918 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1919 }
1920 }
1921
1922 }
1923 /*
1924 * Take care of things one might want to take care of in the event
1925 * that a disklabel isn't present.
1926 */
1927 static void
1928 raidmakedisklabel(rs)
1929 struct raid_softc *rs;
1930 {
1931 struct disklabel *lp = rs->sc_dkdev.dk_label;
1932 db1_printf(("Making a label..\n"));
1933
1934 /*
1935 * For historical reasons, if there's no disklabel present
1936 * the raw partition must be marked FS_BSDFFS.
1937 */
1938
1939 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1940
1941 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1942
1943 lp->d_checksum = dkcksum(lp);
1944 }
1945 /*
1946 * Lookup the provided name in the filesystem. If the file exists,
1947 * is a valid block device, and isn't being used by anyone else,
1948 * set *vpp to the file's vnode.
1949 * You'll find the original of this in ccd.c
1950 */
1951 int
1952 raidlookup(path, p, vpp)
1953 char *path;
1954 struct proc *p;
1955 struct vnode **vpp; /* result */
1956 {
1957 struct nameidata nd;
1958 struct vnode *vp;
1959 struct vattr va;
1960 int error;
1961
1962 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1963 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1964 #ifdef DEBUG
1965 printf("RAIDframe: vn_open returned %d\n", error);
1966 #endif
1967 return (error);
1968 }
1969 vp = nd.ni_vp;
1970 if (vp->v_usecount > 1) {
1971 VOP_UNLOCK(vp, 0);
1972 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1973 return (EBUSY);
1974 }
1975 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1976 VOP_UNLOCK(vp, 0);
1977 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1978 return (error);
1979 }
1980 /* XXX: eventually we should handle VREG, too. */
1981 if (va.va_type != VBLK) {
1982 VOP_UNLOCK(vp, 0);
1983 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1984 return (ENOTBLK);
1985 }
1986 VOP_UNLOCK(vp, 0);
1987 *vpp = vp;
1988 return (0);
1989 }
1990 /*
1991 * Wait interruptibly for an exclusive lock.
1992 *
1993 * XXX
1994 * Several drivers do this; it should be abstracted and made MP-safe.
1995 * (Hmm... where have we seen this warning before :-> GO )
1996 */
1997 static int
1998 raidlock(rs)
1999 struct raid_softc *rs;
2000 {
2001 int error;
2002
2003 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2004 rs->sc_flags |= RAIDF_WANTED;
2005 if ((error =
2006 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2007 return (error);
2008 }
2009 rs->sc_flags |= RAIDF_LOCKED;
2010 return (0);
2011 }
2012 /*
2013 * Unlock and wake up any waiters.
2014 */
2015 static void
2016 raidunlock(rs)
2017 struct raid_softc *rs;
2018 {
2019
2020 rs->sc_flags &= ~RAIDF_LOCKED;
2021 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2022 rs->sc_flags &= ~RAIDF_WANTED;
2023 wakeup(rs);
2024 }
2025 }
2026
2027
2028 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2029 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2030
2031 int
2032 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2033 {
2034 RF_ComponentLabel_t component_label;
2035 raidread_component_label(dev, b_vp, &component_label);
2036 component_label.mod_counter = mod_counter;
2037 component_label.clean = RF_RAID_CLEAN;
2038 raidwrite_component_label(dev, b_vp, &component_label);
2039 return(0);
2040 }
2041
2042
2043 int
2044 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2045 {
2046 RF_ComponentLabel_t component_label;
2047 raidread_component_label(dev, b_vp, &component_label);
2048 component_label.mod_counter = mod_counter;
2049 component_label.clean = RF_RAID_DIRTY;
2050 raidwrite_component_label(dev, b_vp, &component_label);
2051 return(0);
2052 }
2053
2054 /* ARGSUSED */
2055 int
2056 raidread_component_label(dev, b_vp, component_label)
2057 dev_t dev;
2058 struct vnode *b_vp;
2059 RF_ComponentLabel_t *component_label;
2060 {
2061 struct buf *bp;
2062 int error;
2063
2064 /* XXX should probably ensure that we don't try to do this if
2065 someone has changed rf_protected_sectors. */
2066
2067 /* get a block of the appropriate size... */
2068 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2069 bp->b_dev = dev;
2070
2071 /* get our ducks in a row for the read */
2072 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2073 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2074 bp->b_flags = B_BUSY | B_READ;
2075 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2076
2077 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2078
2079 error = biowait(bp);
2080
2081 if (!error) {
2082 memcpy(component_label, bp->b_un.b_addr,
2083 sizeof(RF_ComponentLabel_t));
2084 #if 0
2085 printf("raidread_component_label: got component label:\n");
2086 printf("Version: %d\n",component_label->version);
2087 printf("Serial Number: %d\n",component_label->serial_number);
2088 printf("Mod counter: %d\n",component_label->mod_counter);
2089 printf("Row: %d\n", component_label->row);
2090 printf("Column: %d\n", component_label->column);
2091 printf("Num Rows: %d\n", component_label->num_rows);
2092 printf("Num Columns: %d\n", component_label->num_columns);
2093 printf("Clean: %d\n", component_label->clean);
2094 printf("Status: %d\n", component_label->status);
2095 #endif
2096 } else {
2097 printf("Failed to read RAID component label!\n");
2098 }
2099
2100 bp->b_flags = B_INVAL | B_AGE;
2101 brelse(bp);
2102 return(error);
2103 }
2104 /* ARGSUSED */
2105 int
2106 raidwrite_component_label(dev, b_vp, component_label)
2107 dev_t dev;
2108 struct vnode *b_vp;
2109 RF_ComponentLabel_t *component_label;
2110 {
2111 struct buf *bp;
2112 int error;
2113
2114 /* get a block of the appropriate size... */
2115 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2116 bp->b_dev = dev;
2117
2118 /* get our ducks in a row for the write */
2119 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2120 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2121 bp->b_flags = B_BUSY | B_WRITE;
2122 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2123
2124 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2125
2126 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2127
2128 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2129 error = biowait(bp);
2130 bp->b_flags = B_INVAL | B_AGE;
2131 brelse(bp);
2132 if (error) {
2133 printf("Failed to write RAID component info!\n");
2134 }
2135
2136 return(error);
2137 }
2138
2139 void
2140 rf_markalldirty( raidPtr )
2141 RF_Raid_t *raidPtr;
2142 {
2143 RF_ComponentLabel_t c_label;
2144 int r,c;
2145
2146 raidPtr->mod_counter++;
2147 for (r = 0; r < raidPtr->numRow; r++) {
2148 for (c = 0; c < raidPtr->numCol; c++) {
2149 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2150 raidread_component_label(
2151 raidPtr->Disks[r][c].dev,
2152 raidPtr->raid_cinfo[r][c].ci_vp,
2153 &c_label);
2154 if (c_label.status == rf_ds_spared) {
2155 /* XXX do something special...
2156 but whatever you do, don't
2157 try to access it!! */
2158 } else {
2159 #if 0
2160 c_label.status =
2161 raidPtr->Disks[r][c].status;
2162 raidwrite_component_label(
2163 raidPtr->Disks[r][c].dev,
2164 raidPtr->raid_cinfo[r][c].ci_vp,
2165 &c_label);
2166 #endif
2167 raidmarkdirty(
2168 raidPtr->Disks[r][c].dev,
2169 raidPtr->raid_cinfo[r][c].ci_vp,
2170 raidPtr->mod_counter);
2171 }
2172 }
2173 }
2174 }
2175 /* printf("Component labels marked dirty.\n"); */
2176 #if 0
2177 for( c = 0; c < raidPtr->numSpare ; c++) {
2178 sparecol = raidPtr->numCol + c;
2179 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2180 /*
2181
2182 XXX this is where we get fancy and map this spare
2183 into it's correct spot in the array.
2184
2185 */
2186 /*
2187
2188 we claim this disk is "optimal" if it's
2189 rf_ds_used_spare, as that means it should be
2190 directly substitutable for the disk it replaced.
2191 We note that too...
2192
2193 */
2194
2195 for(i=0;i<raidPtr->numRow;i++) {
2196 for(j=0;j<raidPtr->numCol;j++) {
2197 if ((raidPtr->Disks[i][j].spareRow ==
2198 r) &&
2199 (raidPtr->Disks[i][j].spareCol ==
2200 sparecol)) {
2201 srow = r;
2202 scol = sparecol;
2203 break;
2204 }
2205 }
2206 }
2207
2208 raidread_component_label(
2209 raidPtr->Disks[r][sparecol].dev,
2210 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2211 &c_label);
2212 /* make sure status is noted */
2213 c_label.version = RF_COMPONENT_LABEL_VERSION;
2214 c_label.mod_counter = raidPtr->mod_counter;
2215 c_label.serial_number = raidPtr->serial_number;
2216 c_label.row = srow;
2217 c_label.column = scol;
2218 c_label.num_rows = raidPtr->numRow;
2219 c_label.num_columns = raidPtr->numCol;
2220 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2221 c_label.status = rf_ds_optimal;
2222 raidwrite_component_label(
2223 raidPtr->Disks[r][sparecol].dev,
2224 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2225 &c_label);
2226 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2227 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2228 }
2229 }
2230
2231 #endif
2232 }
2233
2234
2235 void
2236 rf_update_component_labels( raidPtr )
2237 RF_Raid_t *raidPtr;
2238 {
2239 RF_ComponentLabel_t c_label;
2240 int sparecol;
2241 int r,c;
2242 int i,j;
2243 int srow, scol;
2244
2245 srow = -1;
2246 scol = -1;
2247
2248 /* XXX should do extra checks to make sure things really are clean,
2249 rather than blindly setting the clean bit... */
2250
2251 raidPtr->mod_counter++;
2252
2253 for (r = 0; r < raidPtr->numRow; r++) {
2254 for (c = 0; c < raidPtr->numCol; c++) {
2255 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2256 raidread_component_label(
2257 raidPtr->Disks[r][c].dev,
2258 raidPtr->raid_cinfo[r][c].ci_vp,
2259 &c_label);
2260 /* make sure status is noted */
2261 c_label.status = rf_ds_optimal;
2262 raidwrite_component_label(
2263 raidPtr->Disks[r][c].dev,
2264 raidPtr->raid_cinfo[r][c].ci_vp,
2265 &c_label);
2266 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2267 raidmarkclean(
2268 raidPtr->Disks[r][c].dev,
2269 raidPtr->raid_cinfo[r][c].ci_vp,
2270 raidPtr->mod_counter);
2271 }
2272 }
2273 /* else we don't touch it.. */
2274 #if 0
2275 else if (raidPtr->Disks[r][c].status !=
2276 rf_ds_failed) {
2277 raidread_component_label(
2278 raidPtr->Disks[r][c].dev,
2279 raidPtr->raid_cinfo[r][c].ci_vp,
2280 &c_label);
2281 /* make sure status is noted */
2282 c_label.status =
2283 raidPtr->Disks[r][c].status;
2284 raidwrite_component_label(
2285 raidPtr->Disks[r][c].dev,
2286 raidPtr->raid_cinfo[r][c].ci_vp,
2287 &c_label);
2288 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2289 raidmarkclean(
2290 raidPtr->Disks[r][c].dev,
2291 raidPtr->raid_cinfo[r][c].ci_vp,
2292 raidPtr->mod_counter);
2293 }
2294 }
2295 #endif
2296 }
2297 }
2298
2299 for( c = 0; c < raidPtr->numSpare ; c++) {
2300 sparecol = raidPtr->numCol + c;
2301 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2302 /*
2303
2304 we claim this disk is "optimal" if it's
2305 rf_ds_used_spare, as that means it should be
2306 directly substitutable for the disk it replaced.
2307 We note that too...
2308
2309 */
2310
2311 for(i=0;i<raidPtr->numRow;i++) {
2312 for(j=0;j<raidPtr->numCol;j++) {
2313 if ((raidPtr->Disks[i][j].spareRow ==
2314 0) &&
2315 (raidPtr->Disks[i][j].spareCol ==
2316 sparecol)) {
2317 srow = i;
2318 scol = j;
2319 break;
2320 }
2321 }
2322 }
2323
2324 raidread_component_label(
2325 raidPtr->Disks[0][sparecol].dev,
2326 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2327 &c_label);
2328 /* make sure status is noted */
2329 c_label.version = RF_COMPONENT_LABEL_VERSION;
2330 c_label.mod_counter = raidPtr->mod_counter;
2331 c_label.serial_number = raidPtr->serial_number;
2332 c_label.row = srow;
2333 c_label.column = scol;
2334 c_label.num_rows = raidPtr->numRow;
2335 c_label.num_columns = raidPtr->numCol;
2336 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2337 c_label.status = rf_ds_optimal;
2338 raidwrite_component_label(
2339 raidPtr->Disks[0][sparecol].dev,
2340 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2341 &c_label);
2342 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2343 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2344 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2345 raidPtr->mod_counter);
2346 }
2347 }
2348 }
2349 /* printf("Component labels updated\n"); */
2350 }
2351