rf_netbsdkintf.c revision 1.13 1 /* $NetBSD: rf_netbsdkintf.c,v 1.13 1999/03/09 02:59:25 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raid_shutdown(void *);
217
218 void raidattach __P((int));
219 int raidsize __P((dev_t));
220
221 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
222 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
223 static int raidinit __P((dev_t, RF_Raid_t *, int));
224
225 int raidopen __P((dev_t, int, int, struct proc *));
226 int raidclose __P((dev_t, int, int, struct proc *));
227 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
228 int raidwrite __P((dev_t, struct uio *, int));
229 int raidread __P((dev_t, struct uio *, int));
230 void raidstrategy __P((struct buf *));
231 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
232
233 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
234 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
235 void rf_update_component_labels( RF_Raid_t *);
236 /*
237 * Pilfered from ccd.c
238 */
239
240 struct raidbuf {
241 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
242 struct buf *rf_obp; /* ptr. to original I/O buf */
243 int rf_flags; /* misc. flags */
244 RF_DiskQueueData_t *req;/* the request that this was part of.. */
245 };
246
247
248 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
249 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
250
251 /* XXX Not sure if the following should be replacing the raidPtrs above,
252 or if it should be used in conjunction with that... */
253
254 struct raid_softc {
255 int sc_flags; /* flags */
256 int sc_cflags; /* configuration flags */
257 size_t sc_size; /* size of the raid device */
258 dev_t sc_dev; /* our device.. */
259 void * sc_sdhook; /* our shutdown hook */
260 char sc_xname[20]; /* XXX external name */
261 struct disk sc_dkdev; /* generic disk device info */
262 struct pool sc_cbufpool; /* component buffer pool */
263 };
264 /* sc_flags */
265 #define RAIDF_INITED 0x01 /* unit has been initialized */
266 #define RAIDF_WLABEL 0x02 /* label area is writable */
267 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
268 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
269 #define RAIDF_LOCKED 0x80 /* unit is locked */
270
271 #define raidunit(x) DISKUNIT(x)
272 static int numraid = 0;
273
274 #define RAIDLABELDEV(dev) \
275 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
276
277 /* declared here, and made public, for the benefit of KVM stuff.. */
278 struct raid_softc *raid_softc;
279
280 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
281 struct disklabel *));
282 static void raidgetdisklabel __P((dev_t));
283 static void raidmakedisklabel __P((struct raid_softc *));
284
285 static int raidlock __P((struct raid_softc *));
286 static void raidunlock __P((struct raid_softc *));
287 int raidlookup __P((char *, struct proc * p, struct vnode **));
288
289 static void rf_markalldirty __P((RF_Raid_t *));
290
291 void
292 raidattach(num)
293 int num;
294 {
295 int raidID;
296
297 #ifdef DEBUG
298 printf("raidattach: Asked for %d units\n", num);
299 #endif
300
301 if (num <= 0) {
302 #ifdef DIAGNOSTIC
303 panic("raidattach: count <= 0");
304 #endif
305 return;
306 }
307 /* This is where all the initialization stuff gets done. */
308
309 /* Make some space for requested number of units... */
310
311 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
312 if (raidPtrs == NULL) {
313 panic("raidPtrs is NULL!!\n");
314 }
315 rf_kbooted = rf_boot();
316 if (rf_kbooted) {
317 panic("Serious error booting RAID!!\n");
318 }
319 rf_kbooted = RFK_BOOT_GOOD;
320
321 /* put together some datastructures like the CCD device does.. This
322 * lets us lock the device and what-not when it gets opened. */
323
324 raid_softc = (struct raid_softc *)
325 malloc(num * sizeof(struct raid_softc),
326 M_RAIDFRAME, M_NOWAIT);
327 if (raid_softc == NULL) {
328 printf("WARNING: no memory for RAIDframe driver\n");
329 return;
330 }
331 numraid = num;
332 bzero(raid_softc, num * sizeof(struct raid_softc));
333
334 for (raidID = 0; raidID < num; raidID++) {
335 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
336 (RF_Raid_t *));
337 if (raidPtrs[raidID] == NULL) {
338 printf("raidPtrs[%d] is NULL\n", raidID);
339 }
340 }
341 }
342
343
344 int
345 raidsize(dev)
346 dev_t dev;
347 {
348 struct raid_softc *rs;
349 struct disklabel *lp;
350 int part, unit, omask, size;
351
352 unit = raidunit(dev);
353 if (unit >= numraid)
354 return (-1);
355 rs = &raid_softc[unit];
356
357 if ((rs->sc_flags & RAIDF_INITED) == 0)
358 return (-1);
359
360 part = DISKPART(dev);
361 omask = rs->sc_dkdev.dk_openmask & (1 << part);
362 lp = rs->sc_dkdev.dk_label;
363
364 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
365 return (-1);
366
367 if (lp->d_partitions[part].p_fstype != FS_SWAP)
368 size = -1;
369 else
370 size = lp->d_partitions[part].p_size *
371 (lp->d_secsize / DEV_BSIZE);
372
373 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
374 return (-1);
375
376 return (size);
377
378 }
379
380 int
381 raiddump(dev, blkno, va, size)
382 dev_t dev;
383 daddr_t blkno;
384 caddr_t va;
385 size_t size;
386 {
387 /* Not implemented. */
388 return ENXIO;
389 }
390 /* ARGSUSED */
391 int
392 raidopen(dev, flags, fmt, p)
393 dev_t dev;
394 int flags, fmt;
395 struct proc *p;
396 {
397 int unit = raidunit(dev);
398 struct raid_softc *rs;
399 struct disklabel *lp;
400 int part, pmask;
401 unsigned int raidID;
402 int rc;
403 int error = 0;
404
405 /* This whole next chunk of code is somewhat suspect... Not sure it's
406 * needed here at all... XXX */
407
408 if (rf_kbooted == RFK_BOOT_NONE) {
409 printf("Doing restart on raidopen.\n");
410 rf_kbooted = RFK_BOOT_GOOD;
411 rc = rf_boot();
412 if (rc) {
413 rf_kbooted = RFK_BOOT_BAD;
414 printf("Someone is unhappy...\n");
415 return (rc);
416 }
417 }
418 if (unit >= numraid)
419 return (ENXIO);
420 rs = &raid_softc[unit];
421
422 if ((error = raidlock(rs)) != 0)
423 return (error);
424 lp = rs->sc_dkdev.dk_label;
425
426 raidID = raidunit(dev);
427
428 part = DISKPART(dev);
429 pmask = (1 << part);
430
431 db1_printf(("Opening raid device number: %d partition: %d\n",
432 raidID, part));
433
434
435 if ((rs->sc_flags & RAIDF_INITED) &&
436 (rs->sc_dkdev.dk_openmask == 0))
437 raidgetdisklabel(dev);
438
439 /* make sure that this partition exists */
440
441 if (part != RAW_PART) {
442 db1_printf(("Not a raw partition..\n"));
443 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
444 ((part >= lp->d_npartitions) ||
445 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
446 error = ENXIO;
447 raidunlock(rs);
448 db1_printf(("Bailing out...\n"));
449 return (error);
450 }
451 }
452 /* Prevent this unit from being unconfigured while open. */
453 switch (fmt) {
454 case S_IFCHR:
455 rs->sc_dkdev.dk_copenmask |= pmask;
456 break;
457
458 case S_IFBLK:
459 rs->sc_dkdev.dk_bopenmask |= pmask;
460 break;
461 }
462
463 if ((rs->sc_dkdev.dk_openmask == 0) &&
464 ((rs->sc_flags & RAIDF_INITED) != 0)) {
465 /* First one... mark things as dirty... Note that we *MUST*
466 have done a configure before this. I DO NOT WANT TO BE
467 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
468 THAT THEY BELONG TOGETHER!!!!! */
469 /* XXX should check to see if we're only open for reading
470 here... If so, we needn't do this, but then need some
471 other way of keeping track of what's happened.. */
472
473 rf_markalldirty( raidPtrs[unit] );
474 }
475
476
477 rs->sc_dkdev.dk_openmask =
478 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
479
480 raidunlock(rs);
481
482 return (error);
483
484
485 }
486 /* ARGSUSED */
487 int
488 raidclose(dev, flags, fmt, p)
489 dev_t dev;
490 int flags, fmt;
491 struct proc *p;
492 {
493 int unit = raidunit(dev);
494 struct raid_softc *rs;
495 int error = 0;
496 int part;
497
498 if (unit >= numraid)
499 return (ENXIO);
500 rs = &raid_softc[unit];
501
502 if ((error = raidlock(rs)) != 0)
503 return (error);
504
505 part = DISKPART(dev);
506
507 /* ...that much closer to allowing unconfiguration... */
508 switch (fmt) {
509 case S_IFCHR:
510 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
511 break;
512
513 case S_IFBLK:
514 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
515 break;
516 }
517 rs->sc_dkdev.dk_openmask =
518 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
519
520 if ((rs->sc_dkdev.dk_openmask == 0) &&
521 ((rs->sc_flags & RAIDF_INITED) != 0)) {
522 /* Last one... device is not unconfigured yet.
523 Device shutdown has taken care of setting the
524 clean bits if RAIDF_INITED is not set
525 mark things as clean... */
526 rf_update_component_labels( raidPtrs[unit] );
527 }
528
529 raidunlock(rs);
530 return (0);
531
532 }
533
534 void
535 raidstrategy(bp)
536 register struct buf *bp;
537 {
538 register int s;
539
540 unsigned int raidID = raidunit(bp->b_dev);
541 RF_Raid_t *raidPtr;
542 struct raid_softc *rs = &raid_softc[raidID];
543 struct disklabel *lp;
544 int wlabel;
545
546 #if 0
547 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
548 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
549 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
550 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
551 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
552
553 if (bp->b_flags & B_READ)
554 db1_printf(("READ\n"));
555 else
556 db1_printf(("WRITE\n"));
557 #endif
558 if (rf_kbooted != RFK_BOOT_GOOD)
559 return;
560 if (raidID >= numraid || !raidPtrs[raidID]) {
561 bp->b_error = ENODEV;
562 bp->b_flags |= B_ERROR;
563 bp->b_resid = bp->b_bcount;
564 biodone(bp);
565 return;
566 }
567 raidPtr = raidPtrs[raidID];
568 if (!raidPtr->valid) {
569 bp->b_error = ENODEV;
570 bp->b_flags |= B_ERROR;
571 bp->b_resid = bp->b_bcount;
572 biodone(bp);
573 return;
574 }
575 if (bp->b_bcount == 0) {
576 db1_printf(("b_bcount is zero..\n"));
577 biodone(bp);
578 return;
579 }
580 lp = rs->sc_dkdev.dk_label;
581
582 /*
583 * Do bounds checking and adjust transfer. If there's an
584 * error, the bounds check will flag that for us.
585 */
586
587 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
588 if (DISKPART(bp->b_dev) != RAW_PART)
589 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
590 db1_printf(("Bounds check failed!!:%d %d\n",
591 (int) bp->b_blkno, (int) wlabel));
592 biodone(bp);
593 return;
594 }
595 s = splbio(); /* XXX Needed? */
596 db1_printf(("Beginning strategy...\n"));
597
598 bp->b_resid = 0;
599 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
600 NULL, NULL, NULL);
601 if (bp->b_error) {
602 bp->b_flags |= B_ERROR;
603 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
604 bp->b_error));
605 }
606 splx(s);
607 #if 0
608 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
609 bp, bp->b_data,
610 (int) bp->b_bcount, (int) bp->b_resid));
611 #endif
612 }
613 /* ARGSUSED */
614 int
615 raidread(dev, uio, flags)
616 dev_t dev;
617 struct uio *uio;
618 int flags;
619 {
620 int unit = raidunit(dev);
621 struct raid_softc *rs;
622 int part;
623
624 if (unit >= numraid)
625 return (ENXIO);
626 rs = &raid_softc[unit];
627
628 if ((rs->sc_flags & RAIDF_INITED) == 0)
629 return (ENXIO);
630 part = DISKPART(dev);
631
632 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
633
634 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
635
636 }
637 /* ARGSUSED */
638 int
639 raidwrite(dev, uio, flags)
640 dev_t dev;
641 struct uio *uio;
642 int flags;
643 {
644 int unit = raidunit(dev);
645 struct raid_softc *rs;
646
647 if (unit >= numraid)
648 return (ENXIO);
649 rs = &raid_softc[unit];
650
651 if ((rs->sc_flags & RAIDF_INITED) == 0)
652 return (ENXIO);
653 db1_printf(("raidwrite\n"));
654 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
655
656 }
657
658 int
659 raidioctl(dev, cmd, data, flag, p)
660 dev_t dev;
661 u_long cmd;
662 caddr_t data;
663 int flag;
664 struct proc *p;
665 {
666 int unit = raidunit(dev);
667 int error = 0;
668 int part, pmask;
669 struct raid_softc *rs;
670 #if 0
671 int r, c;
672 #endif
673 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
674
675 /* struct ccdbuf *cbp; */
676 /* struct raidbuf *raidbp; */
677 RF_Config_t *k_cfg, *u_cfg;
678 u_char *specific_buf;
679 int retcode = 0;
680 int row;
681 int column;
682 struct rf_recon_req *rrcopy, *rr;
683 RF_ComponentLabel_t *component_label;
684 RF_ComponentLabel_t ci_label;
685 RF_ComponentLabel_t **c_label_ptr;
686 RF_SingleComponent_t *sparePtr,*componentPtr;
687 RF_SingleComponent_t hot_spare;
688 RF_SingleComponent_t component;
689
690 if (unit >= numraid)
691 return (ENXIO);
692 rs = &raid_softc[unit];
693
694 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
695 (int) DISKPART(dev), (int) unit, (int) cmd));
696
697 /* Must be open for writes for these commands... */
698 switch (cmd) {
699 case DIOCSDINFO:
700 case DIOCWDINFO:
701 case DIOCWLABEL:
702 if ((flag & FWRITE) == 0)
703 return (EBADF);
704 }
705
706 /* Must be initialized for these... */
707 switch (cmd) {
708 case DIOCGDINFO:
709 case DIOCSDINFO:
710 case DIOCWDINFO:
711 case DIOCGPART:
712 case DIOCWLABEL:
713 case DIOCGDEFLABEL:
714 case RAIDFRAME_SHUTDOWN:
715 case RAIDFRAME_REWRITEPARITY:
716 case RAIDFRAME_GET_INFO:
717 case RAIDFRAME_RESET_ACCTOTALS:
718 case RAIDFRAME_GET_ACCTOTALS:
719 case RAIDFRAME_KEEP_ACCTOTALS:
720 case RAIDFRAME_GET_SIZE:
721 case RAIDFRAME_FAIL_DISK:
722 case RAIDFRAME_COPYBACK:
723 case RAIDFRAME_CHECKRECON:
724 case RAIDFRAME_GET_COMPONENT_LABEL:
725 case RAIDFRAME_SET_COMPONENT_LABEL:
726 case RAIDFRAME_ADD_HOT_SPARE:
727 case RAIDFRAME_REMOVE_HOT_SPARE:
728 case RAIDFRAME_INIT_LABELS:
729 case RAIDFRAME_REBUILD_IN_PLACE:
730 if ((rs->sc_flags & RAIDF_INITED) == 0)
731 return (ENXIO);
732 }
733
734 switch (cmd) {
735
736
737 /* configure the system */
738 case RAIDFRAME_CONFIGURE:
739
740 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
741 /* copy-in the configuration information */
742 /* data points to a pointer to the configuration structure */
743 u_cfg = *((RF_Config_t **) data);
744 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
745 if (k_cfg == NULL) {
746 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
747 return (ENOMEM);
748 }
749 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
750 sizeof(RF_Config_t));
751 if (retcode) {
752 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
753 retcode));
754 return (retcode);
755 }
756 /* allocate a buffer for the layout-specific data, and copy it
757 * in */
758 if (k_cfg->layoutSpecificSize) {
759 if (k_cfg->layoutSpecificSize > 10000) {
760 /* sanity check */
761 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
762 return (EINVAL);
763 }
764 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
765 (u_char *));
766 if (specific_buf == NULL) {
767 RF_Free(k_cfg, sizeof(RF_Config_t));
768 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
769 return (ENOMEM);
770 }
771 retcode = copyin(k_cfg->layoutSpecific,
772 (caddr_t) specific_buf,
773 k_cfg->layoutSpecificSize);
774 if (retcode) {
775 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
776 retcode));
777 return (retcode);
778 }
779 } else
780 specific_buf = NULL;
781 k_cfg->layoutSpecific = specific_buf;
782
783 /* should do some kind of sanity check on the configuration.
784 * Store the sum of all the bytes in the last byte? */
785
786 #if 0
787 db1_printf(("Considering configuring the system.:%d 0x%x\n",
788 unit, p));
789 #endif
790
791 /* We need the pointer to this a little deeper, so stash it
792 * here... */
793
794 raidPtrs[unit]->proc = p;
795
796 /* configure the system */
797
798 raidPtrs[unit]->raidid = unit;
799 retcode = rf_Configure(raidPtrs[unit], k_cfg);
800
801
802 if (retcode == 0) {
803 retcode = raidinit(dev, raidPtrs[unit], unit);
804 rf_markalldirty( raidPtrs[unit] );
805 #if 0
806 /* register our shutdown hook */
807 if ((rs->sc_sdhook =
808 shutdownhook_establish(raid_shutdown,
809 raidPtrs[unit])) == NULL) {
810 printf("raid%d: WARNING: unable to establish shutdown hook\n",raidPtrs[unit]->raidid);
811 }
812 #endif
813
814 }
815 /* free the buffers. No return code here. */
816 if (k_cfg->layoutSpecificSize) {
817 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
818 }
819 RF_Free(k_cfg, sizeof(RF_Config_t));
820
821 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
822 retcode));
823
824 return (retcode);
825
826 /* shutdown the system */
827 case RAIDFRAME_SHUTDOWN:
828
829 if ((error = raidlock(rs)) != 0)
830 return (error);
831
832 /*
833 * If somebody has a partition mounted, we shouldn't
834 * shutdown.
835 */
836
837 part = DISKPART(dev);
838 pmask = (1 << part);
839 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
840 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
841 (rs->sc_dkdev.dk_copenmask & pmask))) {
842 raidunlock(rs);
843 return (EBUSY);
844 }
845
846 if (rf_debugKernelAccess) {
847 printf("call shutdown\n");
848 }
849 raidPtrs[unit]->proc = p; /* XXX necessary evil */
850
851 retcode = rf_Shutdown(raidPtrs[unit]);
852
853 db1_printf(("Done main shutdown\n"));
854
855 pool_destroy(&rs->sc_cbufpool);
856 db1_printf(("Done freeing component buffer freelist\n"));
857
858 /* It's no longer initialized... */
859 rs->sc_flags &= ~RAIDF_INITED;
860 #if 0
861 shutdownhook_disestablish( rs->sc_sdhook );
862 rs->sc_sdhook = NULL;
863 #endif
864 /* Detach the disk. */
865 disk_detach(&rs->sc_dkdev);
866
867 raidunlock(rs);
868
869 return (retcode);
870 case RAIDFRAME_GET_COMPONENT_LABEL:
871 c_label_ptr = (RF_ComponentLabel_t **) data;
872 /* need to read the component label for the disk indicated
873 by row,column in component_label
874 XXX need to sanity check these values!!!
875 */
876
877 /* For practice, let's get it directly fromdisk, rather
878 than from the in-core copy */
879 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
880 (RF_ComponentLabel_t *));
881 if (component_label == NULL)
882 return (ENOMEM);
883
884 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
885
886 retcode = copyin( *c_label_ptr, component_label,
887 sizeof(RF_ComponentLabel_t));
888
889 if (retcode) {
890 return(retcode);
891 }
892
893 row = component_label->row;
894 printf("Row: %d\n",row);
895 if (row > raidPtrs[unit]->numRow) {
896 row = 0; /* XXX */
897 }
898 column = component_label->column;
899 printf("Column: %d\n",column);
900 if (column > raidPtrs[unit]->numCol) {
901 column = 0; /* XXX */
902 }
903
904 raidread_component_label(
905 raidPtrs[unit]->Disks[row][column].dev,
906 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
907 component_label );
908
909 retcode = copyout((caddr_t) component_label,
910 (caddr_t) *c_label_ptr,
911 sizeof(RF_ComponentLabel_t));
912 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
913 return (retcode);
914
915 case RAIDFRAME_SET_COMPONENT_LABEL:
916 component_label = (RF_ComponentLabel_t *) data;
917
918 /* XXX check the label for valid stuff... */
919 /* Note that some things *should not* get modified --
920 the user should be re-initing the labels instead of
921 trying to patch things.
922 */
923
924 printf("Got component label:\n");
925 printf("Version: %d\n",component_label->version);
926 printf("Serial Number: %d\n",component_label->serial_number);
927 printf("Mod counter: %d\n",component_label->mod_counter);
928 printf("Row: %d\n", component_label->row);
929 printf("Column: %d\n", component_label->column);
930 printf("Num Rows: %d\n", component_label->num_rows);
931 printf("Num Columns: %d\n", component_label->num_columns);
932 printf("Clean: %d\n", component_label->clean);
933 printf("Status: %d\n", component_label->status);
934
935 row = component_label->row;
936 column = component_label->column;
937
938 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
939 (column < 0) || (column > raidPtrs[unit]->numCol)) {
940 return(EINVAL);
941 }
942
943 /* XXX this isn't allowed to do anything for now :-) */
944 #if 0
945 raidwrite_component_label(
946 raidPtrs[unit]->Disks[row][column].dev,
947 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
948 component_label );
949 #endif
950 return (0);
951
952 case RAIDFRAME_INIT_LABELS:
953 component_label = (RF_ComponentLabel_t *) data;
954 /*
955 we only want the serial number from
956 the above. We get all the rest of the information
957 from the config that was used to create this RAID
958 set.
959 */
960
961 raidPtrs[unit]->serial_number = component_label->serial_number;
962 /* current version number */
963 ci_label.version = RF_COMPONENT_LABEL_VERSION;
964 ci_label.serial_number = component_label->serial_number;
965 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
966 ci_label.num_rows = raidPtrs[unit]->numRow;
967 ci_label.num_columns = raidPtrs[unit]->numCol;
968 ci_label.clean = RF_RAID_DIRTY; /* not clean */
969 ci_label.status = rf_ds_optimal; /* "It's good!" */
970
971 for(row=0;row<raidPtrs[unit]->numRow;row++) {
972 ci_label.row = row;
973 for(column=0;column<raidPtrs[unit]->numCol;column++) {
974 ci_label.column = column;
975 raidwrite_component_label(
976 raidPtrs[unit]->Disks[row][column].dev,
977 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
978 &ci_label );
979 }
980 }
981
982 return (retcode);
983
984 /* initialize all parity */
985 case RAIDFRAME_REWRITEPARITY:
986
987 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
988 return (EINVAL);
989 /* borrow the thread of the requesting process */
990 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
991 retcode = rf_RewriteParity(raidPtrs[unit]);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1010 return(retcode);
1011
1012 case RAIDFRAME_REMOVE_HOT_SPARE:
1013 return(retcode);
1014
1015 case RAIDFRAME_REBUILD_IN_PLACE:
1016 componentPtr = (RF_SingleComponent_t *) data;
1017 memcpy( &component, componentPtr,
1018 sizeof(RF_SingleComponent_t));
1019 row = component.row;
1020 column = component.column;
1021 printf("Rebuild: %d %d\n",row, column);
1022 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1023 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1024 return(EINVAL);
1025 }
1026 printf("Attempting a rebuild in place\n");
1027 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1028 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1029 return(retcode);
1030
1031 /* issue a test-unit-ready through raidframe to the indicated
1032 * device */
1033 #if 0 /* XXX not supported yet (ever?) */
1034 case RAIDFRAME_TUR:
1035 /* debug only */
1036 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1037 return (retcode);
1038 #endif
1039 case RAIDFRAME_GET_INFO:
1040 {
1041 RF_Raid_t *raid = raidPtrs[unit];
1042 RF_DeviceConfig_t *cfg, **ucfgp;
1043 int i, j, d;
1044
1045 if (!raid->valid)
1046 return (ENODEV);
1047 ucfgp = (RF_DeviceConfig_t **) data;
1048 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1049 (RF_DeviceConfig_t *));
1050 if (cfg == NULL)
1051 return (ENOMEM);
1052 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1053 cfg->rows = raid->numRow;
1054 cfg->cols = raid->numCol;
1055 cfg->ndevs = raid->numRow * raid->numCol;
1056 if (cfg->ndevs >= RF_MAX_DISKS) {
1057 cfg->ndevs = 0;
1058 return (ENOMEM);
1059 }
1060 cfg->nspares = raid->numSpare;
1061 if (cfg->nspares >= RF_MAX_DISKS) {
1062 cfg->nspares = 0;
1063 return (ENOMEM);
1064 }
1065 cfg->maxqdepth = raid->maxQueueDepth;
1066 d = 0;
1067 for (i = 0; i < cfg->rows; i++) {
1068 for (j = 0; j < cfg->cols; j++) {
1069 cfg->devs[d] = raid->Disks[i][j];
1070 d++;
1071 }
1072 }
1073 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1074 cfg->spares[i] = raid->Disks[0][j];
1075 }
1076 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1077 sizeof(RF_DeviceConfig_t));
1078 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1079
1080 return (retcode);
1081 }
1082 break;
1083
1084 case RAIDFRAME_RESET_ACCTOTALS:
1085 {
1086 RF_Raid_t *raid = raidPtrs[unit];
1087
1088 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1089 return (0);
1090 }
1091 break;
1092
1093 case RAIDFRAME_GET_ACCTOTALS:
1094 {
1095 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1096 RF_Raid_t *raid = raidPtrs[unit];
1097
1098 *totals = raid->acc_totals;
1099 return (0);
1100 }
1101 break;
1102
1103 case RAIDFRAME_KEEP_ACCTOTALS:
1104 {
1105 RF_Raid_t *raid = raidPtrs[unit];
1106 int *keep = (int *) data;
1107
1108 raid->keep_acc_totals = *keep;
1109 return (0);
1110 }
1111 break;
1112
1113 case RAIDFRAME_GET_SIZE:
1114 *(int *) data = raidPtrs[unit]->totalSectors;
1115 return (0);
1116
1117 #define RAIDFRAME_RECON 1
1118 /* XXX The above should probably be set somewhere else!! GO */
1119 #if RAIDFRAME_RECON > 0
1120
1121 /* fail a disk & optionally start reconstruction */
1122 case RAIDFRAME_FAIL_DISK:
1123 rr = (struct rf_recon_req *) data;
1124
1125 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1126 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1127 return (EINVAL);
1128
1129 printf("raid%d: Failing the disk: row: %d col: %d\n",
1130 unit, rr->row, rr->col);
1131
1132 /* make a copy of the recon request so that we don't rely on
1133 * the user's buffer */
1134 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1135 bcopy(rr, rrcopy, sizeof(*rr));
1136 rrcopy->raidPtr = (void *) raidPtrs[unit];
1137
1138 LOCK_RECON_Q_MUTEX();
1139 rrcopy->next = recon_queue;
1140 recon_queue = rrcopy;
1141 wakeup(&recon_queue);
1142 UNLOCK_RECON_Q_MUTEX();
1143
1144 return (0);
1145
1146 /* invoke a copyback operation after recon on whatever disk
1147 * needs it, if any */
1148 case RAIDFRAME_COPYBACK:
1149 /* borrow the current thread to get this done */
1150 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1151 rf_CopybackReconstructedData(raidPtrs[unit]);
1152 return (0);
1153
1154 /* return the percentage completion of reconstruction */
1155 case RAIDFRAME_CHECKRECON:
1156 row = *(int *) data;
1157 if (row < 0 || row >= raidPtrs[unit]->numRow)
1158 return (EINVAL);
1159 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1160 *(int *) data = 100;
1161 else
1162 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1163 return (0);
1164
1165 /* the sparetable daemon calls this to wait for the kernel to
1166 * need a spare table. this ioctl does not return until a
1167 * spare table is needed. XXX -- calling mpsleep here in the
1168 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1169 * -- I should either compute the spare table in the kernel,
1170 * or have a different -- XXX XXX -- interface (a different
1171 * character device) for delivering the table -- XXX */
1172 #if 0
1173 case RAIDFRAME_SPARET_WAIT:
1174 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1175 while (!rf_sparet_wait_queue)
1176 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1177 waitreq = rf_sparet_wait_queue;
1178 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1179 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1180
1181 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1182
1183 RF_Free(waitreq, sizeof(*waitreq));
1184 return (0);
1185
1186
1187 /* wakes up a process waiting on SPARET_WAIT and puts an error
1188 * code in it that will cause the dameon to exit */
1189 case RAIDFRAME_ABORT_SPARET_WAIT:
1190 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1191 waitreq->fcol = -1;
1192 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1193 waitreq->next = rf_sparet_wait_queue;
1194 rf_sparet_wait_queue = waitreq;
1195 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1196 wakeup(&rf_sparet_wait_queue);
1197 return (0);
1198
1199 /* used by the spare table daemon to deliver a spare table
1200 * into the kernel */
1201 case RAIDFRAME_SEND_SPARET:
1202
1203 /* install the spare table */
1204 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1205
1206 /* respond to the requestor. the return status of the spare
1207 * table installation is passed in the "fcol" field */
1208 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1209 waitreq->fcol = retcode;
1210 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1211 waitreq->next = rf_sparet_resp_queue;
1212 rf_sparet_resp_queue = waitreq;
1213 wakeup(&rf_sparet_resp_queue);
1214 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1215
1216 return (retcode);
1217 #endif
1218
1219
1220 #endif /* RAIDFRAME_RECON > 0 */
1221
1222 default:
1223 break; /* fall through to the os-specific code below */
1224
1225 }
1226
1227 if (!raidPtrs[unit]->valid)
1228 return (EINVAL);
1229
1230 /*
1231 * Add support for "regular" device ioctls here.
1232 */
1233
1234 switch (cmd) {
1235 case DIOCGDINFO:
1236 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1237 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1238 break;
1239
1240 case DIOCGPART:
1241 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1242 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1243 ((struct partinfo *) data)->part =
1244 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1245 break;
1246
1247 case DIOCWDINFO:
1248 db1_printf(("DIOCWDINFO\n"));
1249 case DIOCSDINFO:
1250 db1_printf(("DIOCSDINFO\n"));
1251 if ((error = raidlock(rs)) != 0)
1252 return (error);
1253
1254 rs->sc_flags |= RAIDF_LABELLING;
1255
1256 error = setdisklabel(rs->sc_dkdev.dk_label,
1257 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1258 if (error == 0) {
1259 if (cmd == DIOCWDINFO)
1260 error = writedisklabel(RAIDLABELDEV(dev),
1261 raidstrategy, rs->sc_dkdev.dk_label,
1262 rs->sc_dkdev.dk_cpulabel);
1263 }
1264 rs->sc_flags &= ~RAIDF_LABELLING;
1265
1266 raidunlock(rs);
1267
1268 if (error)
1269 return (error);
1270 break;
1271
1272 case DIOCWLABEL:
1273 db1_printf(("DIOCWLABEL\n"));
1274 if (*(int *) data != 0)
1275 rs->sc_flags |= RAIDF_WLABEL;
1276 else
1277 rs->sc_flags &= ~RAIDF_WLABEL;
1278 break;
1279
1280 case DIOCGDEFLABEL:
1281 db1_printf(("DIOCGDEFLABEL\n"));
1282 raidgetdefaultlabel(raidPtrs[unit], rs,
1283 (struct disklabel *) data);
1284 break;
1285
1286 default:
1287 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1288 }
1289 return (retcode);
1290
1291 }
1292
1293
1294 /* raidinit -- complete the rest of the initialization for the
1295 RAIDframe device. */
1296
1297
1298 static int
1299 raidinit(dev, raidPtr, unit)
1300 dev_t dev;
1301 RF_Raid_t *raidPtr;
1302 int unit;
1303 {
1304 int retcode;
1305 /* int ix; */
1306 /* struct raidbuf *raidbp; */
1307 struct raid_softc *rs;
1308
1309 retcode = 0;
1310
1311 rs = &raid_softc[unit];
1312 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1313 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1314
1315
1316 /* XXX should check return code first... */
1317 rs->sc_flags |= RAIDF_INITED;
1318
1319 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1320
1321 rs->sc_dkdev.dk_name = rs->sc_xname;
1322
1323 /* disk_attach actually creates space for the CPU disklabel, among
1324 * other things, so it's critical to call this *BEFORE* we try putzing
1325 * with disklabels. */
1326
1327 disk_attach(&rs->sc_dkdev);
1328
1329 /* XXX There may be a weird interaction here between this, and
1330 * protectedSectors, as used in RAIDframe. */
1331
1332 rs->sc_size = raidPtr->totalSectors;
1333 rs->sc_dev = dev;
1334
1335 return (retcode);
1336 }
1337
1338 void
1339 raid_shutdown(arg)
1340 void *arg;
1341 {
1342 RF_Raid_t *raidPtr = arg;
1343 struct raid_softc *rs;
1344
1345 /* This is called by out shutdown hook.
1346 The lights are being turned out, so lets shutdown as
1347 gracefully as possible */
1348
1349 rs = &raid_softc[raidPtr->raidid];
1350
1351 printf("raid%d: shutdown hooks called\n",raidPtr->raidid);
1352 rf_Shutdown(raidPtr);
1353
1354 /* It's no longer initialized... */
1355 rs->sc_flags &= ~RAIDF_INITED;
1356
1357
1358 }
1359
1360
1361 /*********************************************************
1362 *
1363 * initialization code called at boot time (startup.c)
1364 *
1365 ********************************************************/
1366 int
1367 rf_boot()
1368 {
1369 int i, rc;
1370
1371 rc = rf_mutex_init(&rf_sparet_wait_mutex);
1372 if (rc) {
1373 RF_PANIC();
1374 }
1375
1376 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
1377 recon_queue = NULL;
1378
1379 for (i = 0; i < numraid; i++)
1380 raidPtrs[i] = NULL;
1381 rc = rf_BootRaidframe();
1382 if (rc == 0)
1383 printf("Kernelized RAIDframe activated\n");
1384 else
1385 rf_kbooted = RFK_BOOT_BAD;
1386 return (rc);
1387 }
1388
1389 /*
1390 * This kernel thread never exits. It is created once, and persists
1391 * until the system reboots.
1392 */
1393
1394 void
1395 rf_ReconKernelThread()
1396 {
1397 struct rf_recon_req *req;
1398 int s;
1399
1400 /* XXX not sure what spl() level we should be at here... probably
1401 * splbio() */
1402 s = splbio();
1403
1404 while (1) {
1405 /* grab the next reconstruction request from the queue */
1406 LOCK_RECON_Q_MUTEX();
1407 while (!recon_queue) {
1408 UNLOCK_RECON_Q_MUTEX();
1409 tsleep(&recon_queue, PRIBIO | PCATCH,
1410 "raidframe recon", 0);
1411 LOCK_RECON_Q_MUTEX();
1412 }
1413 req = recon_queue;
1414 recon_queue = recon_queue->next;
1415 UNLOCK_RECON_Q_MUTEX();
1416
1417 /*
1418 * If flags specifies that we should start recon, this call
1419 * will not return until reconstruction completes, fails,
1420 * or is aborted.
1421 */
1422 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1423 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1424
1425 RF_Free(req, sizeof(*req));
1426 }
1427 }
1428 /* wake up the daemon & tell it to get us a spare table
1429 * XXX
1430 * the entries in the queues should be tagged with the raidPtr
1431 * so that in the extremely rare case that two recons happen at once,
1432 * we know for which device were requesting a spare table
1433 * XXX
1434 */
1435 int
1436 rf_GetSpareTableFromDaemon(req)
1437 RF_SparetWait_t *req;
1438 {
1439 int retcode;
1440
1441 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1442 req->next = rf_sparet_wait_queue;
1443 rf_sparet_wait_queue = req;
1444 wakeup(&rf_sparet_wait_queue);
1445
1446 /* mpsleep unlocks the mutex */
1447 while (!rf_sparet_resp_queue) {
1448 tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
1449 "raidframe getsparetable", 0);
1450 #if 0
1451 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1452 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1453 MS_LOCK_SIMPLE);
1454 #endif
1455 }
1456 req = rf_sparet_resp_queue;
1457 rf_sparet_resp_queue = req->next;
1458 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1459
1460 retcode = req->fcol;
1461 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1462 * alloc'd */
1463 return (retcode);
1464 }
1465 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1466 * bp & passes it down.
1467 * any calls originating in the kernel must use non-blocking I/O
1468 * do some extra sanity checking to return "appropriate" error values for
1469 * certain conditions (to make some standard utilities work)
1470 */
1471 int
1472 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1473 RF_Raid_t *raidPtr;
1474 struct buf *bp;
1475 RF_RaidAccessFlags_t flags;
1476 void (*cbFunc) (struct buf *);
1477 void *cbArg;
1478 {
1479 RF_SectorCount_t num_blocks, pb, sum;
1480 RF_RaidAddr_t raid_addr;
1481 int retcode;
1482 struct partition *pp;
1483 daddr_t blocknum;
1484 int unit;
1485 struct raid_softc *rs;
1486 int do_async;
1487
1488 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1489
1490 unit = raidPtr->raidid;
1491 rs = &raid_softc[unit];
1492
1493 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1494 * partition.. Need to make it absolute to the underlying device.. */
1495
1496 blocknum = bp->b_blkno;
1497 if (DISKPART(bp->b_dev) != RAW_PART) {
1498 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1499 blocknum += pp->p_offset;
1500 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1501 pp->p_offset));
1502 } else {
1503 db1_printf(("Is raw..\n"));
1504 }
1505 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1506
1507 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1508 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1509
1510 /* *THIS* is where we adjust what block we're going to... but DO NOT
1511 * TOUCH bp->b_blkno!!! */
1512 raid_addr = blocknum;
1513
1514 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1515 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1516 sum = raid_addr + num_blocks + pb;
1517 if (1 || rf_debugKernelAccess) {
1518 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1519 (int) raid_addr, (int) sum, (int) num_blocks,
1520 (int) pb, (int) bp->b_resid));
1521 }
1522 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1523 || (sum < num_blocks) || (sum < pb)) {
1524 bp->b_error = ENOSPC;
1525 bp->b_flags |= B_ERROR;
1526 bp->b_resid = bp->b_bcount;
1527 biodone(bp);
1528 return (bp->b_error);
1529 }
1530 /*
1531 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1532 */
1533
1534 if (bp->b_bcount & raidPtr->sectorMask) {
1535 bp->b_error = EINVAL;
1536 bp->b_flags |= B_ERROR;
1537 bp->b_resid = bp->b_bcount;
1538 biodone(bp);
1539 return (bp->b_error);
1540 }
1541 db1_printf(("Calling DoAccess..\n"));
1542
1543 /*
1544 * XXX For now, all writes are sync
1545 */
1546 do_async = 1;
1547 if ((bp->b_flags & B_READ) == 0)
1548 do_async = 0;
1549
1550 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1551 * B_READ instead */
1552 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1553 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1554 do_async, raid_addr, num_blocks,
1555 bp->b_un.b_addr,
1556 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1557 NULL, cbFunc, cbArg);
1558 #if 0
1559 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1560 bp->b_data, (int) bp->b_resid));
1561 #endif
1562
1563 /*
1564 * If we requested sync I/O, sleep here.
1565 */
1566 if ((retcode == 0) && (do_async == 0))
1567 tsleep(bp, PRIBIO, "raidsyncio", 0);
1568
1569 return (retcode);
1570 }
1571 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1572
1573 int
1574 rf_DispatchKernelIO(queue, req)
1575 RF_DiskQueue_t *queue;
1576 RF_DiskQueueData_t *req;
1577 {
1578 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1579 struct buf *bp;
1580 struct raidbuf *raidbp = NULL;
1581 struct raid_softc *rs;
1582 int unit;
1583
1584 /* XXX along with the vnode, we also need the softc associated with
1585 * this device.. */
1586
1587 req->queue = queue;
1588
1589 unit = queue->raidPtr->raidid;
1590
1591 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1592
1593 if (unit >= numraid) {
1594 printf("Invalid unit number: %d %d\n", unit, numraid);
1595 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1596 }
1597 rs = &raid_softc[unit];
1598
1599 /* XXX is this the right place? */
1600 disk_busy(&rs->sc_dkdev);
1601
1602 bp = req->bp;
1603
1604 /* XXX when there is a physical disk failure, someone is passing us a
1605 * buffer that contains old stuff!! Attempt to deal with this problem
1606 * without taking a performance hit... (not sure where the real bug
1607 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1608
1609 if (bp->b_flags & B_ERROR) {
1610 bp->b_flags &= ~B_ERROR;
1611 }
1612 if (bp->b_error != 0) {
1613 bp->b_error = 0;
1614 }
1615 raidbp = RAIDGETBUF(rs);
1616
1617 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1618
1619 /*
1620 * context for raidiodone
1621 */
1622 raidbp->rf_obp = bp;
1623 raidbp->req = req;
1624
1625 switch (req->type) {
1626 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1627 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1628 * queue->row, queue->col); */
1629 /* XXX need to do something extra here.. */
1630 /* I'm leaving this in, as I've never actually seen it used,
1631 * and I'd like folks to report it... GO */
1632 printf(("WAKEUP CALLED\n"));
1633 queue->numOutstanding++;
1634
1635 /* XXX need to glue the original buffer into this?? */
1636
1637 KernelWakeupFunc(&raidbp->rf_buf);
1638 break;
1639
1640 case RF_IO_TYPE_READ:
1641 case RF_IO_TYPE_WRITE:
1642
1643 if (req->tracerec) {
1644 RF_ETIMER_START(req->tracerec->timer);
1645 }
1646 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1647 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1648 req->sectorOffset, req->numSector,
1649 req->buf, KernelWakeupFunc, (void *) req,
1650 queue->raidPtr->logBytesPerSector, req->b_proc);
1651
1652 if (rf_debugKernelAccess) {
1653 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1654 (long) bp->b_blkno));
1655 }
1656 queue->numOutstanding++;
1657 queue->last_deq_sector = req->sectorOffset;
1658 /* acc wouldn't have been let in if there were any pending
1659 * reqs at any other priority */
1660 queue->curPriority = req->priority;
1661 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1662 * req->type, queue->row, queue->col); */
1663
1664 db1_printf(("Going for %c to unit %d row %d col %d\n",
1665 req->type, unit, queue->row, queue->col));
1666 db1_printf(("sector %d count %d (%d bytes) %d\n",
1667 (int) req->sectorOffset, (int) req->numSector,
1668 (int) (req->numSector <<
1669 queue->raidPtr->logBytesPerSector),
1670 (int) queue->raidPtr->logBytesPerSector));
1671 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1672 raidbp->rf_buf.b_vp->v_numoutput++;
1673 }
1674 VOP_STRATEGY(&raidbp->rf_buf);
1675
1676 break;
1677
1678 default:
1679 panic("bad req->type in rf_DispatchKernelIO");
1680 }
1681 db1_printf(("Exiting from DispatchKernelIO\n"));
1682 return (0);
1683 }
1684 /* this is the callback function associated with a I/O invoked from
1685 kernel code.
1686 */
1687 static void
1688 KernelWakeupFunc(vbp)
1689 struct buf *vbp;
1690 {
1691 RF_DiskQueueData_t *req = NULL;
1692 RF_DiskQueue_t *queue;
1693 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1694 struct buf *bp;
1695 struct raid_softc *rs;
1696 int unit;
1697 register int s;
1698
1699 s = splbio(); /* XXX */
1700 db1_printf(("recovering the request queue:\n"));
1701 req = raidbp->req;
1702
1703 bp = raidbp->rf_obp;
1704 #if 0
1705 db1_printf(("bp=0x%x\n", bp));
1706 #endif
1707
1708 queue = (RF_DiskQueue_t *) req->queue;
1709
1710 if (raidbp->rf_buf.b_flags & B_ERROR) {
1711 #if 0
1712 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1713 #endif
1714 bp->b_flags |= B_ERROR;
1715 bp->b_error = raidbp->rf_buf.b_error ?
1716 raidbp->rf_buf.b_error : EIO;
1717 }
1718 #if 0
1719 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1720 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1721 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1722 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1723 #endif
1724
1725 /* XXX methinks this could be wrong... */
1726 #if 1
1727 bp->b_resid = raidbp->rf_buf.b_resid;
1728 #endif
1729
1730 if (req->tracerec) {
1731 RF_ETIMER_STOP(req->tracerec->timer);
1732 RF_ETIMER_EVAL(req->tracerec->timer);
1733 RF_LOCK_MUTEX(rf_tracing_mutex);
1734 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1735 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1736 req->tracerec->num_phys_ios++;
1737 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1738 }
1739 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1740
1741 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1742
1743
1744 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1745 * ballistic, and mark the component as hosed... */
1746 #if 1
1747 if (bp->b_flags & B_ERROR) {
1748 /* Mark the disk as dead */
1749 /* but only mark it once... */
1750 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1751 rf_ds_optimal) {
1752 printf("raid%d: IO Error. Marking %s as failed.\n",
1753 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1754 queue->raidPtr->Disks[queue->row][queue->col].status =
1755 rf_ds_failed;
1756 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1757 queue->raidPtr->numFailures++;
1758 /* XXX here we should bump the version number for each component, and write that data out */
1759 } else { /* Disk is already dead... */
1760 /* printf("Disk already marked as dead!\n"); */
1761 }
1762
1763 }
1764 #endif
1765
1766 rs = &raid_softc[unit];
1767 RAIDPUTBUF(rs, raidbp);
1768
1769
1770 if (bp->b_resid == 0) {
1771 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1772 unit, bp->b_resid, bp->b_bcount));
1773 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1774 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1775 } else {
1776 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1777 }
1778
1779 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1780 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1781 /* printf("Exiting KernelWakeupFunc\n"); */
1782
1783 splx(s); /* XXX */
1784 }
1785
1786
1787
1788 /*
1789 * initialize a buf structure for doing an I/O in the kernel.
1790 */
1791 static void
1792 InitBP(
1793 struct buf * bp,
1794 struct vnode * b_vp,
1795 unsigned rw_flag,
1796 dev_t dev,
1797 RF_SectorNum_t startSect,
1798 RF_SectorCount_t numSect,
1799 caddr_t buf,
1800 void (*cbFunc) (struct buf *),
1801 void *cbArg,
1802 int logBytesPerSector,
1803 struct proc * b_proc)
1804 {
1805 /* bp->b_flags = B_PHYS | rw_flag; */
1806 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1807 bp->b_bcount = numSect << logBytesPerSector;
1808 bp->b_bufsize = bp->b_bcount;
1809 bp->b_error = 0;
1810 bp->b_dev = dev;
1811 db1_printf(("bp->b_dev is %d\n", dev));
1812 bp->b_un.b_addr = buf;
1813 #if 0
1814 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1815 #endif
1816
1817 bp->b_blkno = startSect;
1818 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1819 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1820 if (bp->b_bcount == 0) {
1821 panic("bp->b_bcount is zero in InitBP!!\n");
1822 }
1823 bp->b_proc = b_proc;
1824 bp->b_iodone = cbFunc;
1825 bp->b_vp = b_vp;
1826
1827 }
1828 /* Extras... */
1829
1830 unsigned int
1831 rpcc()
1832 {
1833 /* XXX no clue what this is supposed to do.. my guess is that it's
1834 * supposed to read the CPU cycle counter... */
1835 /* db1_printf("this is supposed to do something useful too!??\n"); */
1836 return (0);
1837 }
1838 #if 0
1839 int
1840 rf_GetSpareTableFromDaemon(req)
1841 RF_SparetWait_t *req;
1842 {
1843 int retcode = 1;
1844 printf("This is supposed to do something useful!!\n"); /* XXX */
1845
1846 return (retcode);
1847
1848 }
1849 #endif
1850
1851 static void
1852 raidgetdefaultlabel(raidPtr, rs, lp)
1853 RF_Raid_t *raidPtr;
1854 struct raid_softc *rs;
1855 struct disklabel *lp;
1856 {
1857 db1_printf(("Building a default label...\n"));
1858 bzero(lp, sizeof(*lp));
1859
1860 /* fabricate a label... */
1861 lp->d_secperunit = raidPtr->totalSectors;
1862 lp->d_secsize = raidPtr->bytesPerSector;
1863 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1864 lp->d_ntracks = 1;
1865 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1866 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1867
1868 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1869 lp->d_type = DTYPE_RAID;
1870 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1871 lp->d_rpm = 3600;
1872 lp->d_interleave = 1;
1873 lp->d_flags = 0;
1874
1875 lp->d_partitions[RAW_PART].p_offset = 0;
1876 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1877 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1878 lp->d_npartitions = RAW_PART + 1;
1879
1880 lp->d_magic = DISKMAGIC;
1881 lp->d_magic2 = DISKMAGIC;
1882 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1883
1884 }
1885 /*
1886 * Read the disklabel from the raid device. If one is not present, fake one
1887 * up.
1888 */
1889 static void
1890 raidgetdisklabel(dev)
1891 dev_t dev;
1892 {
1893 int unit = raidunit(dev);
1894 struct raid_softc *rs = &raid_softc[unit];
1895 char *errstring;
1896 struct disklabel *lp = rs->sc_dkdev.dk_label;
1897 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1898 RF_Raid_t *raidPtr;
1899
1900 db1_printf(("Getting the disklabel...\n"));
1901
1902 bzero(clp, sizeof(*clp));
1903
1904 raidPtr = raidPtrs[unit];
1905
1906 raidgetdefaultlabel(raidPtr, rs, lp);
1907
1908 /*
1909 * Call the generic disklabel extraction routine.
1910 */
1911 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1912 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1913 if (errstring)
1914 raidmakedisklabel(rs);
1915 else {
1916 int i;
1917 struct partition *pp;
1918
1919 /*
1920 * Sanity check whether the found disklabel is valid.
1921 *
1922 * This is necessary since total size of the raid device
1923 * may vary when an interleave is changed even though exactly
1924 * same componets are used, and old disklabel may used
1925 * if that is found.
1926 */
1927 if (lp->d_secperunit != rs->sc_size)
1928 printf("WARNING: %s: "
1929 "total sector size in disklabel (%d) != "
1930 "the size of raid (%d)\n", rs->sc_xname,
1931 lp->d_secperunit, rs->sc_size);
1932 for (i = 0; i < lp->d_npartitions; i++) {
1933 pp = &lp->d_partitions[i];
1934 if (pp->p_offset + pp->p_size > rs->sc_size)
1935 printf("WARNING: %s: end of partition `%c' "
1936 "exceeds the size of raid (%d)\n",
1937 rs->sc_xname, 'a' + i, rs->sc_size);
1938 }
1939 }
1940
1941 }
1942 /*
1943 * Take care of things one might want to take care of in the event
1944 * that a disklabel isn't present.
1945 */
1946 static void
1947 raidmakedisklabel(rs)
1948 struct raid_softc *rs;
1949 {
1950 struct disklabel *lp = rs->sc_dkdev.dk_label;
1951 db1_printf(("Making a label..\n"));
1952
1953 /*
1954 * For historical reasons, if there's no disklabel present
1955 * the raw partition must be marked FS_BSDFFS.
1956 */
1957
1958 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1959
1960 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1961
1962 lp->d_checksum = dkcksum(lp);
1963 }
1964 /*
1965 * Lookup the provided name in the filesystem. If the file exists,
1966 * is a valid block device, and isn't being used by anyone else,
1967 * set *vpp to the file's vnode.
1968 * You'll find the original of this in ccd.c
1969 */
1970 int
1971 raidlookup(path, p, vpp)
1972 char *path;
1973 struct proc *p;
1974 struct vnode **vpp; /* result */
1975 {
1976 struct nameidata nd;
1977 struct vnode *vp;
1978 struct vattr va;
1979 int error;
1980
1981 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1982 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1983 #ifdef DEBUG
1984 printf("RAIDframe: vn_open returned %d\n", error);
1985 #endif
1986 return (error);
1987 }
1988 vp = nd.ni_vp;
1989 if (vp->v_usecount > 1) {
1990 VOP_UNLOCK(vp, 0);
1991 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1992 return (EBUSY);
1993 }
1994 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1995 VOP_UNLOCK(vp, 0);
1996 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1997 return (error);
1998 }
1999 /* XXX: eventually we should handle VREG, too. */
2000 if (va.va_type != VBLK) {
2001 VOP_UNLOCK(vp, 0);
2002 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2003 return (ENOTBLK);
2004 }
2005 VOP_UNLOCK(vp, 0);
2006 *vpp = vp;
2007 return (0);
2008 }
2009 /*
2010 * Wait interruptibly for an exclusive lock.
2011 *
2012 * XXX
2013 * Several drivers do this; it should be abstracted and made MP-safe.
2014 * (Hmm... where have we seen this warning before :-> GO )
2015 */
2016 static int
2017 raidlock(rs)
2018 struct raid_softc *rs;
2019 {
2020 int error;
2021
2022 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2023 rs->sc_flags |= RAIDF_WANTED;
2024 if ((error =
2025 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2026 return (error);
2027 }
2028 rs->sc_flags |= RAIDF_LOCKED;
2029 return (0);
2030 }
2031 /*
2032 * Unlock and wake up any waiters.
2033 */
2034 static void
2035 raidunlock(rs)
2036 struct raid_softc *rs;
2037 {
2038
2039 rs->sc_flags &= ~RAIDF_LOCKED;
2040 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2041 rs->sc_flags &= ~RAIDF_WANTED;
2042 wakeup(rs);
2043 }
2044 }
2045
2046
2047 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2048 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2049
2050 int
2051 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2052 {
2053 RF_ComponentLabel_t component_label;
2054 raidread_component_label(dev, b_vp, &component_label);
2055 component_label.mod_counter = mod_counter;
2056 component_label.clean = RF_RAID_CLEAN;
2057 raidwrite_component_label(dev, b_vp, &component_label);
2058 return(0);
2059 }
2060
2061
2062 int
2063 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2064 {
2065 RF_ComponentLabel_t component_label;
2066 raidread_component_label(dev, b_vp, &component_label);
2067 component_label.mod_counter = mod_counter;
2068 component_label.clean = RF_RAID_DIRTY;
2069 raidwrite_component_label(dev, b_vp, &component_label);
2070 return(0);
2071 }
2072
2073 /* ARGSUSED */
2074 int
2075 raidread_component_label(dev, b_vp, component_label)
2076 dev_t dev;
2077 struct vnode *b_vp;
2078 RF_ComponentLabel_t *component_label;
2079 {
2080 struct buf *bp;
2081 int error;
2082
2083 /* XXX should probably ensure that we don't try to do this if
2084 someone has changed rf_protected_sectors. */
2085
2086 /* get a block of the appropriate size... */
2087 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2088 bp->b_dev = dev;
2089
2090 /* get our ducks in a row for the read */
2091 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2092 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2093 bp->b_flags = B_BUSY | B_READ;
2094 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2095
2096 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2097
2098 error = biowait(bp);
2099
2100 if (!error) {
2101 memcpy(component_label, bp->b_un.b_addr,
2102 sizeof(RF_ComponentLabel_t));
2103 #if 0
2104 printf("raidread_component_label: got component label:\n");
2105 printf("Version: %d\n",component_label->version);
2106 printf("Serial Number: %d\n",component_label->serial_number);
2107 printf("Mod counter: %d\n",component_label->mod_counter);
2108 printf("Row: %d\n", component_label->row);
2109 printf("Column: %d\n", component_label->column);
2110 printf("Num Rows: %d\n", component_label->num_rows);
2111 printf("Num Columns: %d\n", component_label->num_columns);
2112 printf("Clean: %d\n", component_label->clean);
2113 printf("Status: %d\n", component_label->status);
2114 #endif
2115 } else {
2116 printf("Failed to read RAID component label!\n");
2117 }
2118
2119 bp->b_flags = B_INVAL | B_AGE;
2120 brelse(bp);
2121 return(error);
2122 }
2123 /* ARGSUSED */
2124 int
2125 raidwrite_component_label(dev, b_vp, component_label)
2126 dev_t dev;
2127 struct vnode *b_vp;
2128 RF_ComponentLabel_t *component_label;
2129 {
2130 struct buf *bp;
2131 int error;
2132
2133 /* get a block of the appropriate size... */
2134 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2135 bp->b_dev = dev;
2136
2137 /* get our ducks in a row for the write */
2138 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2139 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2140 bp->b_flags = B_BUSY | B_WRITE;
2141 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2142
2143 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2144
2145 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2146
2147 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2148 error = biowait(bp);
2149 bp->b_flags = B_INVAL | B_AGE;
2150 brelse(bp);
2151 if (error) {
2152 printf("Failed to write RAID component info!\n");
2153 }
2154
2155 return(error);
2156 }
2157
2158 void
2159 rf_markalldirty( raidPtr )
2160 RF_Raid_t *raidPtr;
2161 {
2162 RF_ComponentLabel_t c_label;
2163 int r,c;
2164
2165 raidPtr->mod_counter++;
2166 for (r = 0; r < raidPtr->numRow; r++) {
2167 for (c = 0; c < raidPtr->numCol; c++) {
2168 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2169 raidread_component_label(
2170 raidPtr->Disks[r][c].dev,
2171 raidPtr->raid_cinfo[r][c].ci_vp,
2172 &c_label);
2173 if (c_label.status == rf_ds_spared) {
2174 /* XXX do something special...
2175 but whatever you do, don't
2176 try to access it!! */
2177 } else {
2178 #if 0
2179 c_label.status =
2180 raidPtr->Disks[r][c].status;
2181 raidwrite_component_label(
2182 raidPtr->Disks[r][c].dev,
2183 raidPtr->raid_cinfo[r][c].ci_vp,
2184 &c_label);
2185 #endif
2186 raidmarkdirty(
2187 raidPtr->Disks[r][c].dev,
2188 raidPtr->raid_cinfo[r][c].ci_vp,
2189 raidPtr->mod_counter);
2190 }
2191 }
2192 }
2193 }
2194 /* printf("Component labels marked dirty.\n"); */
2195 #if 0
2196 for( c = 0; c < raidPtr->numSpare ; c++) {
2197 sparecol = raidPtr->numCol + c;
2198 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2199 /*
2200
2201 XXX this is where we get fancy and map this spare
2202 into it's correct spot in the array.
2203
2204 */
2205 /*
2206
2207 we claim this disk is "optimal" if it's
2208 rf_ds_used_spare, as that means it should be
2209 directly substitutable for the disk it replaced.
2210 We note that too...
2211
2212 */
2213
2214 for(i=0;i<raidPtr->numRow;i++) {
2215 for(j=0;j<raidPtr->numCol;j++) {
2216 if ((raidPtr->Disks[i][j].spareRow ==
2217 r) &&
2218 (raidPtr->Disks[i][j].spareCol ==
2219 sparecol)) {
2220 srow = r;
2221 scol = sparecol;
2222 break;
2223 }
2224 }
2225 }
2226
2227 raidread_component_label(
2228 raidPtr->Disks[r][sparecol].dev,
2229 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2230 &c_label);
2231 /* make sure status is noted */
2232 c_label.version = RF_COMPONENT_LABEL_VERSION;
2233 c_label.mod_counter = raidPtr->mod_counter;
2234 c_label.serial_number = raidPtr->serial_number;
2235 c_label.row = srow;
2236 c_label.column = scol;
2237 c_label.num_rows = raidPtr->numRow;
2238 c_label.num_columns = raidPtr->numCol;
2239 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2240 c_label.status = rf_ds_optimal;
2241 raidwrite_component_label(
2242 raidPtr->Disks[r][sparecol].dev,
2243 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2244 &c_label);
2245 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2246 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2247 }
2248 }
2249
2250 #endif
2251 }
2252
2253
2254 void
2255 rf_update_component_labels( raidPtr )
2256 RF_Raid_t *raidPtr;
2257 {
2258 RF_ComponentLabel_t c_label;
2259 int sparecol;
2260 int r,c;
2261 int i,j;
2262 int srow, scol;
2263
2264 srow = -1;
2265 scol = -1;
2266
2267 /* XXX should do extra checks to make sure things really are clean,
2268 rather than blindly setting the clean bit... */
2269
2270 raidPtr->mod_counter++;
2271
2272 for (r = 0; r < raidPtr->numRow; r++) {
2273 for (c = 0; c < raidPtr->numCol; c++) {
2274 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2275 raidread_component_label(
2276 raidPtr->Disks[r][c].dev,
2277 raidPtr->raid_cinfo[r][c].ci_vp,
2278 &c_label);
2279 /* make sure status is noted */
2280 c_label.status = rf_ds_optimal;
2281 raidwrite_component_label(
2282 raidPtr->Disks[r][c].dev,
2283 raidPtr->raid_cinfo[r][c].ci_vp,
2284 &c_label);
2285 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2286 raidmarkclean(
2287 raidPtr->Disks[r][c].dev,
2288 raidPtr->raid_cinfo[r][c].ci_vp,
2289 raidPtr->mod_counter);
2290 }
2291 }
2292 /* else we don't touch it.. */
2293 #if 0
2294 else if (raidPtr->Disks[r][c].status !=
2295 rf_ds_failed) {
2296 raidread_component_label(
2297 raidPtr->Disks[r][c].dev,
2298 raidPtr->raid_cinfo[r][c].ci_vp,
2299 &c_label);
2300 /* make sure status is noted */
2301 c_label.status =
2302 raidPtr->Disks[r][c].status;
2303 raidwrite_component_label(
2304 raidPtr->Disks[r][c].dev,
2305 raidPtr->raid_cinfo[r][c].ci_vp,
2306 &c_label);
2307 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2308 raidmarkclean(
2309 raidPtr->Disks[r][c].dev,
2310 raidPtr->raid_cinfo[r][c].ci_vp,
2311 raidPtr->mod_counter);
2312 }
2313 }
2314 #endif
2315 }
2316 }
2317
2318 for( c = 0; c < raidPtr->numSpare ; c++) {
2319 sparecol = raidPtr->numCol + c;
2320 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2321 /*
2322
2323 we claim this disk is "optimal" if it's
2324 rf_ds_used_spare, as that means it should be
2325 directly substitutable for the disk it replaced.
2326 We note that too...
2327
2328 */
2329
2330 for(i=0;i<raidPtr->numRow;i++) {
2331 for(j=0;j<raidPtr->numCol;j++) {
2332 if ((raidPtr->Disks[i][j].spareRow ==
2333 0) &&
2334 (raidPtr->Disks[i][j].spareCol ==
2335 sparecol)) {
2336 srow = i;
2337 scol = j;
2338 break;
2339 }
2340 }
2341 }
2342
2343 raidread_component_label(
2344 raidPtr->Disks[0][sparecol].dev,
2345 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2346 &c_label);
2347 /* make sure status is noted */
2348 c_label.version = RF_COMPONENT_LABEL_VERSION;
2349 c_label.mod_counter = raidPtr->mod_counter;
2350 c_label.serial_number = raidPtr->serial_number;
2351 c_label.row = srow;
2352 c_label.column = scol;
2353 c_label.num_rows = raidPtr->numRow;
2354 c_label.num_columns = raidPtr->numCol;
2355 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2356 c_label.status = rf_ds_optimal;
2357 raidwrite_component_label(
2358 raidPtr->Disks[0][sparecol].dev,
2359 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2360 &c_label);
2361 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2362 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2363 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2364 raidPtr->mod_counter);
2365 }
2366 }
2367 }
2368 /* printf("Component labels updated\n"); */
2369 }
2370