rf_netbsdkintf.c revision 1.36 1 /* $NetBSD: rf_netbsdkintf.c,v 1.36 1999/12/15 02:02:16 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
183 * reconstruction
184 * requests */
185
186
187 decl_simple_lock_data(, recon_queue_mutex)
188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
190
191 /* prototypes */
192 static void KernelWakeupFunc(struct buf * bp);
193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
194 dev_t dev, RF_SectorNum_t startSect,
195 RF_SectorCount_t numSect, caddr_t buf,
196 void (*cbFunc) (struct buf *), void *cbArg,
197 int logBytesPerSector, struct proc * b_proc);
198
199 #define Dprintf0(s) if (rf_queueDebug) \
200 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
201 #define Dprintf1(s,a) if (rf_queueDebug) \
202 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
203 #define Dprintf2(s,a,b) if (rf_queueDebug) \
204 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
206 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
207
208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
210
211 void raidattach __P((int));
212 int raidsize __P((dev_t));
213
214 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
215 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
216 static int raidinit __P((dev_t, RF_Raid_t *, int));
217
218 int raidopen __P((dev_t, int, int, struct proc *));
219 int raidclose __P((dev_t, int, int, struct proc *));
220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
221 int raidwrite __P((dev_t, struct uio *, int));
222 int raidread __P((dev_t, struct uio *, int));
223 void raidstrategy __P((struct buf *));
224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
225
226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
228 void rf_update_component_labels( RF_Raid_t *);
229 /*
230 * Pilfered from ccd.c
231 */
232
233 struct raidbuf {
234 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
235 struct buf *rf_obp; /* ptr. to original I/O buf */
236 int rf_flags; /* misc. flags */
237 RF_DiskQueueData_t *req;/* the request that this was part of.. */
238 };
239
240
241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
242 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
243
244 /* XXX Not sure if the following should be replacing the raidPtrs above,
245 or if it should be used in conjunction with that... */
246
247 struct raid_softc {
248 int sc_flags; /* flags */
249 int sc_cflags; /* configuration flags */
250 size_t sc_size; /* size of the raid device */
251 dev_t sc_dev; /* our device.. */
252 char sc_xname[20]; /* XXX external name */
253 struct disk sc_dkdev; /* generic disk device info */
254 struct pool sc_cbufpool; /* component buffer pool */
255 struct buf buf_queue; /* used for the device queue */
256 };
257 /* sc_flags */
258 #define RAIDF_INITED 0x01 /* unit has been initialized */
259 #define RAIDF_WLABEL 0x02 /* label area is writable */
260 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
261 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
262 #define RAIDF_LOCKED 0x80 /* unit is locked */
263
264 #define raidunit(x) DISKUNIT(x)
265 static int numraid = 0;
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immedately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
295 struct disklabel *));
296 static void raidgetdisklabel __P((dev_t));
297 static void raidmakedisklabel __P((struct raid_softc *));
298
299 static int raidlock __P((struct raid_softc *));
300 static void raidunlock __P((struct raid_softc *));
301 int raidlookup __P((char *, struct proc * p, struct vnode **));
302
303 static void rf_markalldirty __P((RF_Raid_t *));
304
305 void
306 raidattach(num)
307 int num;
308 {
309 int raidID;
310 int i, rc;
311
312 #ifdef DEBUG
313 printf("raidattach: Asked for %d units\n", num);
314 #endif
315
316 if (num <= 0) {
317 #ifdef DIAGNOSTIC
318 panic("raidattach: count <= 0");
319 #endif
320 return;
321 }
322 /* This is where all the initialization stuff gets done. */
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!\n");
329 }
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337 recon_queue = NULL;
338
339 for (i = 0; i < numraid; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357 numraid = num;
358 bzero(raid_softc, num * sizeof(struct raid_softc));
359
360 for (raidID = 0; raidID < num; raidID++) {
361 raid_softc[raidID].buf_queue.b_actf = NULL;
362 raid_softc[raidID].buf_queue.b_actb =
363 &raid_softc[raidID].buf_queue.b_actf;
364 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
365 (RF_Raid_t *));
366 if (raidPtrs[raidID] == NULL) {
367 printf("raidPtrs[%d] is NULL\n", raidID);
368 }
369 }
370 }
371
372
373 int
374 raidsize(dev)
375 dev_t dev;
376 {
377 struct raid_softc *rs;
378 struct disklabel *lp;
379 int part, unit, omask, size;
380
381 unit = raidunit(dev);
382 if (unit >= numraid)
383 return (-1);
384 rs = &raid_softc[unit];
385
386 if ((rs->sc_flags & RAIDF_INITED) == 0)
387 return (-1);
388
389 part = DISKPART(dev);
390 omask = rs->sc_dkdev.dk_openmask & (1 << part);
391 lp = rs->sc_dkdev.dk_label;
392
393 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
394 return (-1);
395
396 if (lp->d_partitions[part].p_fstype != FS_SWAP)
397 size = -1;
398 else
399 size = lp->d_partitions[part].p_size *
400 (lp->d_secsize / DEV_BSIZE);
401
402 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
403 return (-1);
404
405 return (size);
406
407 }
408
409 int
410 raiddump(dev, blkno, va, size)
411 dev_t dev;
412 daddr_t blkno;
413 caddr_t va;
414 size_t size;
415 {
416 /* Not implemented. */
417 return ENXIO;
418 }
419 /* ARGSUSED */
420 int
421 raidopen(dev, flags, fmt, p)
422 dev_t dev;
423 int flags, fmt;
424 struct proc *p;
425 {
426 int unit = raidunit(dev);
427 struct raid_softc *rs;
428 struct disklabel *lp;
429 int part, pmask;
430 int error = 0;
431
432 if (unit >= numraid)
433 return (ENXIO);
434 rs = &raid_softc[unit];
435
436 if ((error = raidlock(rs)) != 0)
437 return (error);
438 lp = rs->sc_dkdev.dk_label;
439
440 part = DISKPART(dev);
441 pmask = (1 << part);
442
443 db1_printf(("Opening raid device number: %d partition: %d\n",
444 unit, part));
445
446
447 if ((rs->sc_flags & RAIDF_INITED) &&
448 (rs->sc_dkdev.dk_openmask == 0))
449 raidgetdisklabel(dev);
450
451 /* make sure that this partition exists */
452
453 if (part != RAW_PART) {
454 db1_printf(("Not a raw partition..\n"));
455 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
456 ((part >= lp->d_npartitions) ||
457 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
458 error = ENXIO;
459 raidunlock(rs);
460 db1_printf(("Bailing out...\n"));
461 return (error);
462 }
463 }
464 /* Prevent this unit from being unconfigured while open. */
465 switch (fmt) {
466 case S_IFCHR:
467 rs->sc_dkdev.dk_copenmask |= pmask;
468 break;
469
470 case S_IFBLK:
471 rs->sc_dkdev.dk_bopenmask |= pmask;
472 break;
473 }
474
475 if ((rs->sc_dkdev.dk_openmask == 0) &&
476 ((rs->sc_flags & RAIDF_INITED) != 0)) {
477 /* First one... mark things as dirty... Note that we *MUST*
478 have done a configure before this. I DO NOT WANT TO BE
479 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
480 THAT THEY BELONG TOGETHER!!!!! */
481 /* XXX should check to see if we're only open for reading
482 here... If so, we needn't do this, but then need some
483 other way of keeping track of what's happened.. */
484
485 rf_markalldirty( raidPtrs[unit] );
486 }
487
488
489 rs->sc_dkdev.dk_openmask =
490 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
491
492 raidunlock(rs);
493
494 return (error);
495
496
497 }
498 /* ARGSUSED */
499 int
500 raidclose(dev, flags, fmt, p)
501 dev_t dev;
502 int flags, fmt;
503 struct proc *p;
504 {
505 int unit = raidunit(dev);
506 struct raid_softc *rs;
507 int error = 0;
508 int part;
509
510 if (unit >= numraid)
511 return (ENXIO);
512 rs = &raid_softc[unit];
513
514 if ((error = raidlock(rs)) != 0)
515 return (error);
516
517 part = DISKPART(dev);
518
519 /* ...that much closer to allowing unconfiguration... */
520 switch (fmt) {
521 case S_IFCHR:
522 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
523 break;
524
525 case S_IFBLK:
526 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
527 break;
528 }
529 rs->sc_dkdev.dk_openmask =
530 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
531
532 if ((rs->sc_dkdev.dk_openmask == 0) &&
533 ((rs->sc_flags & RAIDF_INITED) != 0)) {
534 /* Last one... device is not unconfigured yet.
535 Device shutdown has taken care of setting the
536 clean bits if RAIDF_INITED is not set
537 mark things as clean... */
538 rf_update_component_labels( raidPtrs[unit] );
539 }
540
541 raidunlock(rs);
542 return (0);
543
544 }
545
546 void
547 raidstrategy(bp)
548 register struct buf *bp;
549 {
550 register int s;
551
552 unsigned int raidID = raidunit(bp->b_dev);
553 RF_Raid_t *raidPtr;
554 struct raid_softc *rs = &raid_softc[raidID];
555 struct disklabel *lp;
556 struct buf *dp;
557 int wlabel;
558
559 if ((rs->sc_flags & RAIDF_INITED) ==0) {
560 bp->b_error = ENXIO;
561 bp->b_flags = B_ERROR;
562 bp->b_resid = bp->b_bcount;
563 biodone(bp);
564 return;
565 }
566 if (raidID >= numraid || !raidPtrs[raidID]) {
567 bp->b_error = ENODEV;
568 bp->b_flags |= B_ERROR;
569 bp->b_resid = bp->b_bcount;
570 biodone(bp);
571 return;
572 }
573 raidPtr = raidPtrs[raidID];
574 if (!raidPtr->valid) {
575 bp->b_error = ENODEV;
576 bp->b_flags |= B_ERROR;
577 bp->b_resid = bp->b_bcount;
578 biodone(bp);
579 return;
580 }
581 if (bp->b_bcount == 0) {
582 db1_printf(("b_bcount is zero..\n"));
583 biodone(bp);
584 return;
585 }
586 lp = rs->sc_dkdev.dk_label;
587
588 /*
589 * Do bounds checking and adjust transfer. If there's an
590 * error, the bounds check will flag that for us.
591 */
592
593 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
594 if (DISKPART(bp->b_dev) != RAW_PART)
595 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
596 db1_printf(("Bounds check failed!!:%d %d\n",
597 (int) bp->b_blkno, (int) wlabel));
598 biodone(bp);
599 return;
600 }
601 s = splbio();
602
603 bp->b_resid = 0;
604
605 /* stuff it onto our queue */
606
607 dp = &rs->buf_queue;
608 bp->b_actf = NULL;
609 bp->b_actb = dp->b_actb;
610 *dp->b_actb = bp;
611 dp->b_actb = &bp->b_actf;
612
613 raidstart(raidPtrs[raidID]);
614
615 splx(s);
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 RF_Config_t *k_cfg, *u_cfg;
675 u_char *specific_buf;
676 int retcode = 0;
677 int row;
678 int column;
679 int s;
680 struct rf_recon_req *rrcopy, *rr;
681 RF_ComponentLabel_t *component_label;
682 RF_ComponentLabel_t ci_label;
683 RF_ComponentLabel_t **c_label_ptr;
684 RF_SingleComponent_t *sparePtr,*componentPtr;
685 RF_SingleComponent_t hot_spare;
686 RF_SingleComponent_t component;
687
688 if (unit >= numraid)
689 return (ENXIO);
690 rs = &raid_softc[unit];
691
692 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
693 (int) DISKPART(dev), (int) unit, (int) cmd));
694
695 /* Must be open for writes for these commands... */
696 switch (cmd) {
697 case DIOCSDINFO:
698 case DIOCWDINFO:
699 case DIOCWLABEL:
700 if ((flag & FWRITE) == 0)
701 return (EBADF);
702 }
703
704 /* Must be initialized for these... */
705 switch (cmd) {
706 case DIOCGDINFO:
707 case DIOCSDINFO:
708 case DIOCWDINFO:
709 case DIOCGPART:
710 case DIOCWLABEL:
711 case DIOCGDEFLABEL:
712 case RAIDFRAME_SHUTDOWN:
713 case RAIDFRAME_REWRITEPARITY:
714 case RAIDFRAME_GET_INFO:
715 case RAIDFRAME_RESET_ACCTOTALS:
716 case RAIDFRAME_GET_ACCTOTALS:
717 case RAIDFRAME_KEEP_ACCTOTALS:
718 case RAIDFRAME_GET_SIZE:
719 case RAIDFRAME_FAIL_DISK:
720 case RAIDFRAME_COPYBACK:
721 case RAIDFRAME_CHECKRECON:
722 case RAIDFRAME_GET_COMPONENT_LABEL:
723 case RAIDFRAME_SET_COMPONENT_LABEL:
724 case RAIDFRAME_ADD_HOT_SPARE:
725 case RAIDFRAME_REMOVE_HOT_SPARE:
726 case RAIDFRAME_INIT_LABELS:
727 case RAIDFRAME_REBUILD_IN_PLACE:
728 case RAIDFRAME_CHECK_PARITY:
729 if ((rs->sc_flags & RAIDF_INITED) == 0)
730 return (ENXIO);
731 }
732
733 switch (cmd) {
734
735
736 /* configure the system */
737 case RAIDFRAME_CONFIGURE:
738
739 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
740 /* copy-in the configuration information */
741 /* data points to a pointer to the configuration structure */
742 u_cfg = *((RF_Config_t **) data);
743 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
744 if (k_cfg == NULL) {
745 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
746 return (ENOMEM);
747 }
748 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
749 sizeof(RF_Config_t));
750 if (retcode) {
751 RF_Free(k_cfg, sizeof(RF_Config_t));
752 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
753 retcode));
754 return (retcode);
755 }
756 /* allocate a buffer for the layout-specific data, and copy it
757 * in */
758 if (k_cfg->layoutSpecificSize) {
759 if (k_cfg->layoutSpecificSize > 10000) {
760 /* sanity check */
761 RF_Free(k_cfg, sizeof(RF_Config_t));
762 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
763 return (EINVAL);
764 }
765 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
766 (u_char *));
767 if (specific_buf == NULL) {
768 RF_Free(k_cfg, sizeof(RF_Config_t));
769 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
770 return (ENOMEM);
771 }
772 retcode = copyin(k_cfg->layoutSpecific,
773 (caddr_t) specific_buf,
774 k_cfg->layoutSpecificSize);
775 if (retcode) {
776 RF_Free(k_cfg, sizeof(RF_Config_t));
777 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
778 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
779 retcode));
780 return (retcode);
781 }
782 } else
783 specific_buf = NULL;
784 k_cfg->layoutSpecific = specific_buf;
785
786 /* should do some kind of sanity check on the configuration.
787 * Store the sum of all the bytes in the last byte? */
788
789 /* configure the system */
790
791 raidPtrs[unit]->raidid = unit;
792
793 retcode = rf_Configure(raidPtrs[unit], k_cfg);
794
795 /* allow this many simultaneous IO's to this RAID device */
796 raidPtrs[unit]->openings = RAIDOUTSTANDING;
797
798 if (retcode == 0) {
799 retcode = raidinit(dev, raidPtrs[unit], unit);
800 rf_markalldirty( raidPtrs[unit] );
801 }
802 /* free the buffers. No return code here. */
803 if (k_cfg->layoutSpecificSize) {
804 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
805 }
806 RF_Free(k_cfg, sizeof(RF_Config_t));
807
808 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
809 retcode));
810
811 return (retcode);
812
813 /* shutdown the system */
814 case RAIDFRAME_SHUTDOWN:
815
816 if ((error = raidlock(rs)) != 0)
817 return (error);
818
819 /*
820 * If somebody has a partition mounted, we shouldn't
821 * shutdown.
822 */
823
824 part = DISKPART(dev);
825 pmask = (1 << part);
826 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
827 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
828 (rs->sc_dkdev.dk_copenmask & pmask))) {
829 raidunlock(rs);
830 return (EBUSY);
831 }
832
833 if (rf_debugKernelAccess) {
834 printf("call shutdown\n");
835 }
836
837 retcode = rf_Shutdown(raidPtrs[unit]);
838
839 db1_printf(("Done main shutdown\n"));
840
841 pool_destroy(&rs->sc_cbufpool);
842 db1_printf(("Done freeing component buffer freelist\n"));
843
844 /* It's no longer initialized... */
845 rs->sc_flags &= ~RAIDF_INITED;
846
847 /* Detach the disk. */
848 disk_detach(&rs->sc_dkdev);
849
850 raidunlock(rs);
851
852 return (retcode);
853 case RAIDFRAME_GET_COMPONENT_LABEL:
854 c_label_ptr = (RF_ComponentLabel_t **) data;
855 /* need to read the component label for the disk indicated
856 by row,column in component_label
857 XXX need to sanity check these values!!!
858 */
859
860 /* For practice, let's get it directly fromdisk, rather
861 than from the in-core copy */
862 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
863 (RF_ComponentLabel_t *));
864 if (component_label == NULL)
865 return (ENOMEM);
866
867 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
868
869 retcode = copyin( *c_label_ptr, component_label,
870 sizeof(RF_ComponentLabel_t));
871
872 if (retcode) {
873 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
874 return(retcode);
875 }
876
877 row = component_label->row;
878 column = component_label->column;
879
880 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
881 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
882 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
883 return(EINVAL);
884 }
885
886 raidread_component_label(
887 raidPtrs[unit]->Disks[row][column].dev,
888 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
889 component_label );
890
891 retcode = copyout((caddr_t) component_label,
892 (caddr_t) *c_label_ptr,
893 sizeof(RF_ComponentLabel_t));
894 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
895 return (retcode);
896
897 case RAIDFRAME_SET_COMPONENT_LABEL:
898 component_label = (RF_ComponentLabel_t *) data;
899
900 /* XXX check the label for valid stuff... */
901 /* Note that some things *should not* get modified --
902 the user should be re-initing the labels instead of
903 trying to patch things.
904 */
905
906 printf("Got component label:\n");
907 printf("Version: %d\n",component_label->version);
908 printf("Serial Number: %d\n",component_label->serial_number);
909 printf("Mod counter: %d\n",component_label->mod_counter);
910 printf("Row: %d\n", component_label->row);
911 printf("Column: %d\n", component_label->column);
912 printf("Num Rows: %d\n", component_label->num_rows);
913 printf("Num Columns: %d\n", component_label->num_columns);
914 printf("Clean: %d\n", component_label->clean);
915 printf("Status: %d\n", component_label->status);
916
917 row = component_label->row;
918 column = component_label->column;
919
920 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
921 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
922 return(EINVAL);
923 }
924
925 /* XXX this isn't allowed to do anything for now :-) */
926 #if 0
927 raidwrite_component_label(
928 raidPtrs[unit]->Disks[row][column].dev,
929 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
930 component_label );
931 #endif
932 return (0);
933
934 case RAIDFRAME_INIT_LABELS:
935 component_label = (RF_ComponentLabel_t *) data;
936 /*
937 we only want the serial number from
938 the above. We get all the rest of the information
939 from the config that was used to create this RAID
940 set.
941 */
942
943 raidPtrs[unit]->serial_number = component_label->serial_number;
944 /* current version number */
945 ci_label.version = RF_COMPONENT_LABEL_VERSION;
946 ci_label.serial_number = component_label->serial_number;
947 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
948 ci_label.num_rows = raidPtrs[unit]->numRow;
949 ci_label.num_columns = raidPtrs[unit]->numCol;
950 ci_label.clean = RF_RAID_DIRTY; /* not clean */
951 ci_label.status = rf_ds_optimal; /* "It's good!" */
952
953 for(row=0;row<raidPtrs[unit]->numRow;row++) {
954 ci_label.row = row;
955 for(column=0;column<raidPtrs[unit]->numCol;column++) {
956 ci_label.column = column;
957 raidwrite_component_label(
958 raidPtrs[unit]->Disks[row][column].dev,
959 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
960 &ci_label );
961 }
962 }
963
964 return (retcode);
965
966 /* initialize all parity */
967 case RAIDFRAME_REWRITEPARITY:
968
969 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
970 /* Parity for RAID 0 is trivially correct */
971 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
972 return(0);
973 }
974
975 /* borrow the thread of the requesting process */
976
977 s = splbio();
978 retcode = rf_RewriteParity(raidPtrs[unit]);
979 splx(s);
980 /* return I/O Error if the parity rewrite fails */
981
982 if (retcode) {
983 retcode = EIO;
984 } else {
985 /* set the clean bit! If we shutdown correctly,
986 the clean bit on each component label will get
987 set */
988 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
989 }
990 return (retcode);
991
992
993 case RAIDFRAME_ADD_HOT_SPARE:
994 sparePtr = (RF_SingleComponent_t *) data;
995 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
996 printf("Adding spare\n");
997 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
998 return(retcode);
999
1000 case RAIDFRAME_REMOVE_HOT_SPARE:
1001 return(retcode);
1002
1003 case RAIDFRAME_REBUILD_IN_PLACE:
1004
1005 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1006 /* Can't do this on a RAID 0!! */
1007 return(EINVAL);
1008 }
1009
1010 componentPtr = (RF_SingleComponent_t *) data;
1011 memcpy( &component, componentPtr,
1012 sizeof(RF_SingleComponent_t));
1013 row = component.row;
1014 column = component.column;
1015 printf("Rebuild: %d %d\n",row, column);
1016 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1017 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1018 return(EINVAL);
1019 }
1020 printf("Attempting a rebuild in place\n");
1021 s = splbio();
1022 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1023 splx(s);
1024 return(retcode);
1025
1026 case RAIDFRAME_GET_INFO:
1027 {
1028 RF_Raid_t *raid = raidPtrs[unit];
1029 RF_DeviceConfig_t *cfg, **ucfgp;
1030 int i, j, d;
1031
1032 if (!raid->valid)
1033 return (ENODEV);
1034 ucfgp = (RF_DeviceConfig_t **) data;
1035 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1036 (RF_DeviceConfig_t *));
1037 if (cfg == NULL)
1038 return (ENOMEM);
1039 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1040 cfg->rows = raid->numRow;
1041 cfg->cols = raid->numCol;
1042 cfg->ndevs = raid->numRow * raid->numCol;
1043 if (cfg->ndevs >= RF_MAX_DISKS) {
1044 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1045 return (ENOMEM);
1046 }
1047 cfg->nspares = raid->numSpare;
1048 if (cfg->nspares >= RF_MAX_DISKS) {
1049 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1050 return (ENOMEM);
1051 }
1052 cfg->maxqdepth = raid->maxQueueDepth;
1053 d = 0;
1054 for (i = 0; i < cfg->rows; i++) {
1055 for (j = 0; j < cfg->cols; j++) {
1056 cfg->devs[d] = raid->Disks[i][j];
1057 d++;
1058 }
1059 }
1060 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1061 cfg->spares[i] = raid->Disks[0][j];
1062 }
1063 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1064 sizeof(RF_DeviceConfig_t));
1065 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1066
1067 return (retcode);
1068 }
1069 break;
1070 case RAIDFRAME_CHECK_PARITY:
1071 *(int *) data = raidPtrs[unit]->parity_good;
1072 return (0);
1073 case RAIDFRAME_RESET_ACCTOTALS:
1074 {
1075 RF_Raid_t *raid = raidPtrs[unit];
1076
1077 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1078 return (0);
1079 }
1080 break;
1081
1082 case RAIDFRAME_GET_ACCTOTALS:
1083 {
1084 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1085 RF_Raid_t *raid = raidPtrs[unit];
1086
1087 *totals = raid->acc_totals;
1088 return (0);
1089 }
1090 break;
1091
1092 case RAIDFRAME_KEEP_ACCTOTALS:
1093 {
1094 RF_Raid_t *raid = raidPtrs[unit];
1095 int *keep = (int *) data;
1096
1097 raid->keep_acc_totals = *keep;
1098 return (0);
1099 }
1100 break;
1101
1102 case RAIDFRAME_GET_SIZE:
1103 *(int *) data = raidPtrs[unit]->totalSectors;
1104 return (0);
1105
1106 /* fail a disk & optionally start reconstruction */
1107 case RAIDFRAME_FAIL_DISK:
1108
1109 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1110 /* Can't do this on a RAID 0!! */
1111 return(EINVAL);
1112 }
1113
1114 rr = (struct rf_recon_req *) data;
1115
1116 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1117 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1118 return (EINVAL);
1119
1120 printf("raid%d: Failing the disk: row: %d col: %d\n",
1121 unit, rr->row, rr->col);
1122
1123 /* make a copy of the recon request so that we don't rely on
1124 * the user's buffer */
1125 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1126 bcopy(rr, rrcopy, sizeof(*rr));
1127 rrcopy->raidPtr = (void *) raidPtrs[unit];
1128
1129 LOCK_RECON_Q_MUTEX();
1130 rrcopy->next = recon_queue;
1131 recon_queue = rrcopy;
1132 wakeup(&recon_queue);
1133 UNLOCK_RECON_Q_MUTEX();
1134
1135 return (0);
1136
1137 /* invoke a copyback operation after recon on whatever disk
1138 * needs it, if any */
1139 case RAIDFRAME_COPYBACK:
1140
1141 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1142 /* This makes no sense on a RAID 0!! */
1143 return(EINVAL);
1144 }
1145
1146 /* borrow the current thread to get this done */
1147
1148 s = splbio();
1149 rf_CopybackReconstructedData(raidPtrs[unit]);
1150 splx(s);
1151 return (0);
1152
1153 /* return the percentage completion of reconstruction */
1154 case RAIDFRAME_CHECKRECON:
1155 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1156 /* This makes no sense on a RAID 0 */
1157 return(EINVAL);
1158 }
1159
1160 row = *(int *) data;
1161 if (row < 0 || row >= raidPtrs[unit]->numRow)
1162 return (EINVAL);
1163 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1164 *(int *) data = 100;
1165 else
1166 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1167 return (0);
1168
1169 /* the sparetable daemon calls this to wait for the kernel to
1170 * need a spare table. this ioctl does not return until a
1171 * spare table is needed. XXX -- calling mpsleep here in the
1172 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1173 * -- I should either compute the spare table in the kernel,
1174 * or have a different -- XXX XXX -- interface (a different
1175 * character device) for delivering the table -- XXX */
1176 #if 0
1177 case RAIDFRAME_SPARET_WAIT:
1178 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1179 while (!rf_sparet_wait_queue)
1180 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1181 waitreq = rf_sparet_wait_queue;
1182 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1183 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1184
1185 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1186
1187 RF_Free(waitreq, sizeof(*waitreq));
1188 return (0);
1189
1190
1191 /* wakes up a process waiting on SPARET_WAIT and puts an error
1192 * code in it that will cause the dameon to exit */
1193 case RAIDFRAME_ABORT_SPARET_WAIT:
1194 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1195 waitreq->fcol = -1;
1196 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1197 waitreq->next = rf_sparet_wait_queue;
1198 rf_sparet_wait_queue = waitreq;
1199 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1200 wakeup(&rf_sparet_wait_queue);
1201 return (0);
1202
1203 /* used by the spare table daemon to deliver a spare table
1204 * into the kernel */
1205 case RAIDFRAME_SEND_SPARET:
1206
1207 /* install the spare table */
1208 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1209
1210 /* respond to the requestor. the return status of the spare
1211 * table installation is passed in the "fcol" field */
1212 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1213 waitreq->fcol = retcode;
1214 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1215 waitreq->next = rf_sparet_resp_queue;
1216 rf_sparet_resp_queue = waitreq;
1217 wakeup(&rf_sparet_resp_queue);
1218 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1219
1220 return (retcode);
1221 #endif
1222
1223 default:
1224 break; /* fall through to the os-specific code below */
1225
1226 }
1227
1228 if (!raidPtrs[unit]->valid)
1229 return (EINVAL);
1230
1231 /*
1232 * Add support for "regular" device ioctls here.
1233 */
1234
1235 switch (cmd) {
1236 case DIOCGDINFO:
1237 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1238 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1239 break;
1240
1241 case DIOCGPART:
1242 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1243 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1244 ((struct partinfo *) data)->part =
1245 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1246 break;
1247
1248 case DIOCWDINFO:
1249 db1_printf(("DIOCWDINFO\n"));
1250 case DIOCSDINFO:
1251 db1_printf(("DIOCSDINFO\n"));
1252 if ((error = raidlock(rs)) != 0)
1253 return (error);
1254
1255 rs->sc_flags |= RAIDF_LABELLING;
1256
1257 error = setdisklabel(rs->sc_dkdev.dk_label,
1258 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1259 if (error == 0) {
1260 if (cmd == DIOCWDINFO)
1261 error = writedisklabel(RAIDLABELDEV(dev),
1262 raidstrategy, rs->sc_dkdev.dk_label,
1263 rs->sc_dkdev.dk_cpulabel);
1264 }
1265 rs->sc_flags &= ~RAIDF_LABELLING;
1266
1267 raidunlock(rs);
1268
1269 if (error)
1270 return (error);
1271 break;
1272
1273 case DIOCWLABEL:
1274 db1_printf(("DIOCWLABEL\n"));
1275 if (*(int *) data != 0)
1276 rs->sc_flags |= RAIDF_WLABEL;
1277 else
1278 rs->sc_flags &= ~RAIDF_WLABEL;
1279 break;
1280
1281 case DIOCGDEFLABEL:
1282 db1_printf(("DIOCGDEFLABEL\n"));
1283 raidgetdefaultlabel(raidPtrs[unit], rs,
1284 (struct disklabel *) data);
1285 break;
1286
1287 default:
1288 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1289 }
1290 return (retcode);
1291
1292 }
1293
1294
1295 /* raidinit -- complete the rest of the initialization for the
1296 RAIDframe device. */
1297
1298
1299 static int
1300 raidinit(dev, raidPtr, unit)
1301 dev_t dev;
1302 RF_Raid_t *raidPtr;
1303 int unit;
1304 {
1305 int retcode;
1306 /* int ix; */
1307 /* struct raidbuf *raidbp; */
1308 struct raid_softc *rs;
1309
1310 retcode = 0;
1311
1312 rs = &raid_softc[unit];
1313 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1314 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1315
1316
1317 /* XXX should check return code first... */
1318 rs->sc_flags |= RAIDF_INITED;
1319
1320 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1321
1322 rs->sc_dkdev.dk_name = rs->sc_xname;
1323
1324 /* disk_attach actually creates space for the CPU disklabel, among
1325 * other things, so it's critical to call this *BEFORE* we try putzing
1326 * with disklabels. */
1327
1328 disk_attach(&rs->sc_dkdev);
1329
1330 /* XXX There may be a weird interaction here between this, and
1331 * protectedSectors, as used in RAIDframe. */
1332
1333 rs->sc_size = raidPtr->totalSectors;
1334 rs->sc_dev = dev;
1335
1336 return (retcode);
1337 }
1338
1339 /*
1340 * This kernel thread never exits. It is created once, and persists
1341 * until the system reboots.
1342 */
1343
1344 void
1345 rf_ReconKernelThread()
1346 {
1347 struct rf_recon_req *req;
1348 int s;
1349
1350 /* XXX not sure what spl() level we should be at here... probably
1351 * splbio() */
1352 s = splbio();
1353
1354 while (1) {
1355 /* grab the next reconstruction request from the queue */
1356 LOCK_RECON_Q_MUTEX();
1357 while (!recon_queue) {
1358 UNLOCK_RECON_Q_MUTEX();
1359 tsleep(&recon_queue, PRIBIO,
1360 "raidframe recon", 0);
1361 LOCK_RECON_Q_MUTEX();
1362 }
1363 req = recon_queue;
1364 recon_queue = recon_queue->next;
1365 UNLOCK_RECON_Q_MUTEX();
1366
1367 /*
1368 * If flags specifies that we should start recon, this call
1369 * will not return until reconstruction completes, fails,
1370 * or is aborted.
1371 */
1372 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1373 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1374
1375 RF_Free(req, sizeof(*req));
1376 }
1377 }
1378 /* wake up the daemon & tell it to get us a spare table
1379 * XXX
1380 * the entries in the queues should be tagged with the raidPtr
1381 * so that in the extremely rare case that two recons happen at once,
1382 * we know for which device were requesting a spare table
1383 * XXX
1384 */
1385 int
1386 rf_GetSpareTableFromDaemon(req)
1387 RF_SparetWait_t *req;
1388 {
1389 int retcode;
1390
1391 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1392 req->next = rf_sparet_wait_queue;
1393 rf_sparet_wait_queue = req;
1394 wakeup(&rf_sparet_wait_queue);
1395
1396 /* mpsleep unlocks the mutex */
1397 while (!rf_sparet_resp_queue) {
1398 tsleep(&rf_sparet_resp_queue, PRIBIO,
1399 "raidframe getsparetable", 0);
1400 }
1401 req = rf_sparet_resp_queue;
1402 rf_sparet_resp_queue = req->next;
1403 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1404
1405 retcode = req->fcol;
1406 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1407 * alloc'd */
1408 return (retcode);
1409 }
1410 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1411 * bp & passes it down.
1412 * any calls originating in the kernel must use non-blocking I/O
1413 * do some extra sanity checking to return "appropriate" error values for
1414 * certain conditions (to make some standard utilities work)
1415 *
1416 * Formerly known as: rf_DoAccessKernel
1417 */
1418 void
1419 raidstart(raidPtr)
1420 RF_Raid_t *raidPtr;
1421 {
1422 RF_SectorCount_t num_blocks, pb, sum;
1423 RF_RaidAddr_t raid_addr;
1424 int retcode;
1425 struct partition *pp;
1426 daddr_t blocknum;
1427 int unit;
1428 struct raid_softc *rs;
1429 int do_async;
1430 struct buf *bp;
1431 struct buf *dp;
1432
1433 unit = raidPtr->raidid;
1434 rs = &raid_softc[unit];
1435
1436 /* Check to see if we're at the limit... */
1437 RF_LOCK_MUTEX(raidPtr->mutex);
1438 while (raidPtr->openings > 0) {
1439 RF_UNLOCK_MUTEX(raidPtr->mutex);
1440
1441 /* get the next item, if any, from the queue */
1442 dp = &rs->buf_queue;
1443 bp = dp->b_actf;
1444 if (bp == NULL) {
1445 /* nothing more to do */
1446 return;
1447 }
1448
1449 /* update structures */
1450 dp = bp->b_actf;
1451 if (dp != NULL) {
1452 dp->b_actb = bp->b_actb;
1453 } else {
1454 rs->buf_queue.b_actb = bp->b_actb;
1455 }
1456 *bp->b_actb = dp;
1457
1458 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1459 * partition.. Need to make it absolute to the underlying
1460 * device.. */
1461
1462 blocknum = bp->b_blkno;
1463 if (DISKPART(bp->b_dev) != RAW_PART) {
1464 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1465 blocknum += pp->p_offset;
1466 }
1467
1468 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1469 (int) blocknum));
1470
1471 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1472 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1473
1474 /* *THIS* is where we adjust what block we're going to...
1475 * but DO NOT TOUCH bp->b_blkno!!! */
1476 raid_addr = blocknum;
1477
1478 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1479 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1480 sum = raid_addr + num_blocks + pb;
1481 if (1 || rf_debugKernelAccess) {
1482 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1483 (int) raid_addr, (int) sum, (int) num_blocks,
1484 (int) pb, (int) bp->b_resid));
1485 }
1486 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1487 || (sum < num_blocks) || (sum < pb)) {
1488 bp->b_error = ENOSPC;
1489 bp->b_flags |= B_ERROR;
1490 bp->b_resid = bp->b_bcount;
1491 biodone(bp);
1492 RF_LOCK_MUTEX(raidPtr->mutex);
1493 continue;
1494 }
1495 /*
1496 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1497 */
1498
1499 if (bp->b_bcount & raidPtr->sectorMask) {
1500 bp->b_error = EINVAL;
1501 bp->b_flags |= B_ERROR;
1502 bp->b_resid = bp->b_bcount;
1503 biodone(bp);
1504 RF_LOCK_MUTEX(raidPtr->mutex);
1505 continue;
1506
1507 }
1508 db1_printf(("Calling DoAccess..\n"));
1509
1510
1511 RF_LOCK_MUTEX(raidPtr->mutex);
1512 raidPtr->openings--;
1513 RF_UNLOCK_MUTEX(raidPtr->mutex);
1514
1515 /*
1516 * Everything is async.
1517 */
1518 do_async = 1;
1519
1520 /* don't ever condition on bp->b_flags & B_WRITE.
1521 * always condition on B_READ instead */
1522
1523 /* XXX we're still at splbio() here... do we *really*
1524 need to be? */
1525
1526 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1527 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1528 do_async, raid_addr, num_blocks,
1529 bp->b_un.b_addr, bp, NULL, NULL,
1530 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1531
1532
1533 RF_LOCK_MUTEX(raidPtr->mutex);
1534 }
1535 RF_UNLOCK_MUTEX(raidPtr->mutex);
1536 }
1537
1538
1539
1540
1541 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1542
1543 int
1544 rf_DispatchKernelIO(queue, req)
1545 RF_DiskQueue_t *queue;
1546 RF_DiskQueueData_t *req;
1547 {
1548 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1549 struct buf *bp;
1550 struct raidbuf *raidbp = NULL;
1551 struct raid_softc *rs;
1552 int unit;
1553
1554 /* XXX along with the vnode, we also need the softc associated with
1555 * this device.. */
1556
1557 req->queue = queue;
1558
1559 unit = queue->raidPtr->raidid;
1560
1561 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1562
1563 if (unit >= numraid) {
1564 printf("Invalid unit number: %d %d\n", unit, numraid);
1565 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1566 }
1567 rs = &raid_softc[unit];
1568
1569 /* XXX is this the right place? */
1570 disk_busy(&rs->sc_dkdev);
1571
1572 bp = req->bp;
1573 #if 1
1574 /* XXX when there is a physical disk failure, someone is passing us a
1575 * buffer that contains old stuff!! Attempt to deal with this problem
1576 * without taking a performance hit... (not sure where the real bug
1577 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1578
1579 if (bp->b_flags & B_ERROR) {
1580 bp->b_flags &= ~B_ERROR;
1581 }
1582 if (bp->b_error != 0) {
1583 bp->b_error = 0;
1584 }
1585 #endif
1586 raidbp = RAIDGETBUF(rs);
1587
1588 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1589
1590 /*
1591 * context for raidiodone
1592 */
1593 raidbp->rf_obp = bp;
1594 raidbp->req = req;
1595
1596 LIST_INIT(&raidbp->rf_buf.b_dep);
1597
1598 switch (req->type) {
1599 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1600 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1601 * queue->row, queue->col); */
1602 /* XXX need to do something extra here.. */
1603 /* I'm leaving this in, as I've never actually seen it used,
1604 * and I'd like folks to report it... GO */
1605 printf(("WAKEUP CALLED\n"));
1606 queue->numOutstanding++;
1607
1608 /* XXX need to glue the original buffer into this?? */
1609
1610 KernelWakeupFunc(&raidbp->rf_buf);
1611 break;
1612
1613 case RF_IO_TYPE_READ:
1614 case RF_IO_TYPE_WRITE:
1615
1616 if (req->tracerec) {
1617 RF_ETIMER_START(req->tracerec->timer);
1618 }
1619 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1620 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1621 req->sectorOffset, req->numSector,
1622 req->buf, KernelWakeupFunc, (void *) req,
1623 queue->raidPtr->logBytesPerSector, req->b_proc);
1624
1625 if (rf_debugKernelAccess) {
1626 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1627 (long) bp->b_blkno));
1628 }
1629 queue->numOutstanding++;
1630 queue->last_deq_sector = req->sectorOffset;
1631 /* acc wouldn't have been let in if there were any pending
1632 * reqs at any other priority */
1633 queue->curPriority = req->priority;
1634 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1635 * req->type, queue->row, queue->col); */
1636
1637 db1_printf(("Going for %c to unit %d row %d col %d\n",
1638 req->type, unit, queue->row, queue->col));
1639 db1_printf(("sector %d count %d (%d bytes) %d\n",
1640 (int) req->sectorOffset, (int) req->numSector,
1641 (int) (req->numSector <<
1642 queue->raidPtr->logBytesPerSector),
1643 (int) queue->raidPtr->logBytesPerSector));
1644 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1645 raidbp->rf_buf.b_vp->v_numoutput++;
1646 }
1647 VOP_STRATEGY(&raidbp->rf_buf);
1648
1649 break;
1650
1651 default:
1652 panic("bad req->type in rf_DispatchKernelIO");
1653 }
1654 db1_printf(("Exiting from DispatchKernelIO\n"));
1655 return (0);
1656 }
1657 /* this is the callback function associated with a I/O invoked from
1658 kernel code.
1659 */
1660 static void
1661 KernelWakeupFunc(vbp)
1662 struct buf *vbp;
1663 {
1664 RF_DiskQueueData_t *req = NULL;
1665 RF_DiskQueue_t *queue;
1666 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1667 struct buf *bp;
1668 struct raid_softc *rs;
1669 int unit;
1670 register int s;
1671
1672 s = splbio();
1673 db1_printf(("recovering the request queue:\n"));
1674 req = raidbp->req;
1675
1676 bp = raidbp->rf_obp;
1677
1678 queue = (RF_DiskQueue_t *) req->queue;
1679
1680 if (raidbp->rf_buf.b_flags & B_ERROR) {
1681 bp->b_flags |= B_ERROR;
1682 bp->b_error = raidbp->rf_buf.b_error ?
1683 raidbp->rf_buf.b_error : EIO;
1684 }
1685
1686 /* XXX methinks this could be wrong... */
1687 #if 1
1688 bp->b_resid = raidbp->rf_buf.b_resid;
1689 #endif
1690
1691 if (req->tracerec) {
1692 RF_ETIMER_STOP(req->tracerec->timer);
1693 RF_ETIMER_EVAL(req->tracerec->timer);
1694 RF_LOCK_MUTEX(rf_tracing_mutex);
1695 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1696 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1697 req->tracerec->num_phys_ios++;
1698 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1699 }
1700 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1701
1702 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1703
1704
1705 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1706 * ballistic, and mark the component as hosed... */
1707
1708 if (bp->b_flags & B_ERROR) {
1709 /* Mark the disk as dead */
1710 /* but only mark it once... */
1711 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1712 rf_ds_optimal) {
1713 printf("raid%d: IO Error. Marking %s as failed.\n",
1714 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1715 queue->raidPtr->Disks[queue->row][queue->col].status =
1716 rf_ds_failed;
1717 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1718 queue->raidPtr->numFailures++;
1719 /* XXX here we should bump the version number for each component, and write that data out */
1720 } else { /* Disk is already dead... */
1721 /* printf("Disk already marked as dead!\n"); */
1722 }
1723
1724 }
1725
1726 rs = &raid_softc[unit];
1727 RAIDPUTBUF(rs, raidbp);
1728
1729
1730 if (bp->b_resid == 0) {
1731 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1732 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1733 }
1734
1735 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1736 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1737
1738 splx(s);
1739 }
1740
1741
1742
1743 /*
1744 * initialize a buf structure for doing an I/O in the kernel.
1745 */
1746 static void
1747 InitBP(
1748 struct buf * bp,
1749 struct vnode * b_vp,
1750 unsigned rw_flag,
1751 dev_t dev,
1752 RF_SectorNum_t startSect,
1753 RF_SectorCount_t numSect,
1754 caddr_t buf,
1755 void (*cbFunc) (struct buf *),
1756 void *cbArg,
1757 int logBytesPerSector,
1758 struct proc * b_proc)
1759 {
1760 /* bp->b_flags = B_PHYS | rw_flag; */
1761 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1762 bp->b_bcount = numSect << logBytesPerSector;
1763 bp->b_bufsize = bp->b_bcount;
1764 bp->b_error = 0;
1765 bp->b_dev = dev;
1766 bp->b_un.b_addr = buf;
1767 bp->b_blkno = startSect;
1768 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1769 if (bp->b_bcount == 0) {
1770 panic("bp->b_bcount is zero in InitBP!!\n");
1771 }
1772 bp->b_proc = b_proc;
1773 bp->b_iodone = cbFunc;
1774 bp->b_vp = b_vp;
1775
1776 }
1777
1778 static void
1779 raidgetdefaultlabel(raidPtr, rs, lp)
1780 RF_Raid_t *raidPtr;
1781 struct raid_softc *rs;
1782 struct disklabel *lp;
1783 {
1784 db1_printf(("Building a default label...\n"));
1785 bzero(lp, sizeof(*lp));
1786
1787 /* fabricate a label... */
1788 lp->d_secperunit = raidPtr->totalSectors;
1789 lp->d_secsize = raidPtr->bytesPerSector;
1790 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1791 lp->d_ntracks = 1;
1792 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1793 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1794
1795 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1796 lp->d_type = DTYPE_RAID;
1797 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1798 lp->d_rpm = 3600;
1799 lp->d_interleave = 1;
1800 lp->d_flags = 0;
1801
1802 lp->d_partitions[RAW_PART].p_offset = 0;
1803 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1804 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1805 lp->d_npartitions = RAW_PART + 1;
1806
1807 lp->d_magic = DISKMAGIC;
1808 lp->d_magic2 = DISKMAGIC;
1809 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1810
1811 }
1812 /*
1813 * Read the disklabel from the raid device. If one is not present, fake one
1814 * up.
1815 */
1816 static void
1817 raidgetdisklabel(dev)
1818 dev_t dev;
1819 {
1820 int unit = raidunit(dev);
1821 struct raid_softc *rs = &raid_softc[unit];
1822 char *errstring;
1823 struct disklabel *lp = rs->sc_dkdev.dk_label;
1824 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1825 RF_Raid_t *raidPtr;
1826
1827 db1_printf(("Getting the disklabel...\n"));
1828
1829 bzero(clp, sizeof(*clp));
1830
1831 raidPtr = raidPtrs[unit];
1832
1833 raidgetdefaultlabel(raidPtr, rs, lp);
1834
1835 /*
1836 * Call the generic disklabel extraction routine.
1837 */
1838 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1839 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1840 if (errstring)
1841 raidmakedisklabel(rs);
1842 else {
1843 int i;
1844 struct partition *pp;
1845
1846 /*
1847 * Sanity check whether the found disklabel is valid.
1848 *
1849 * This is necessary since total size of the raid device
1850 * may vary when an interleave is changed even though exactly
1851 * same componets are used, and old disklabel may used
1852 * if that is found.
1853 */
1854 if (lp->d_secperunit != rs->sc_size)
1855 printf("WARNING: %s: "
1856 "total sector size in disklabel (%d) != "
1857 "the size of raid (%ld)\n", rs->sc_xname,
1858 lp->d_secperunit, (long) rs->sc_size);
1859 for (i = 0; i < lp->d_npartitions; i++) {
1860 pp = &lp->d_partitions[i];
1861 if (pp->p_offset + pp->p_size > rs->sc_size)
1862 printf("WARNING: %s: end of partition `%c' "
1863 "exceeds the size of raid (%ld)\n",
1864 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1865 }
1866 }
1867
1868 }
1869 /*
1870 * Take care of things one might want to take care of in the event
1871 * that a disklabel isn't present.
1872 */
1873 static void
1874 raidmakedisklabel(rs)
1875 struct raid_softc *rs;
1876 {
1877 struct disklabel *lp = rs->sc_dkdev.dk_label;
1878 db1_printf(("Making a label..\n"));
1879
1880 /*
1881 * For historical reasons, if there's no disklabel present
1882 * the raw partition must be marked FS_BSDFFS.
1883 */
1884
1885 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1886
1887 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1888
1889 lp->d_checksum = dkcksum(lp);
1890 }
1891 /*
1892 * Lookup the provided name in the filesystem. If the file exists,
1893 * is a valid block device, and isn't being used by anyone else,
1894 * set *vpp to the file's vnode.
1895 * You'll find the original of this in ccd.c
1896 */
1897 int
1898 raidlookup(path, p, vpp)
1899 char *path;
1900 struct proc *p;
1901 struct vnode **vpp; /* result */
1902 {
1903 struct nameidata nd;
1904 struct vnode *vp;
1905 struct vattr va;
1906 int error;
1907
1908 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1909 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1910 #ifdef DEBUG
1911 printf("RAIDframe: vn_open returned %d\n", error);
1912 #endif
1913 return (error);
1914 }
1915 vp = nd.ni_vp;
1916 if (vp->v_usecount > 1) {
1917 VOP_UNLOCK(vp, 0);
1918 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1919 return (EBUSY);
1920 }
1921 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1922 VOP_UNLOCK(vp, 0);
1923 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1924 return (error);
1925 }
1926 /* XXX: eventually we should handle VREG, too. */
1927 if (va.va_type != VBLK) {
1928 VOP_UNLOCK(vp, 0);
1929 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1930 return (ENOTBLK);
1931 }
1932 VOP_UNLOCK(vp, 0);
1933 *vpp = vp;
1934 return (0);
1935 }
1936 /*
1937 * Wait interruptibly for an exclusive lock.
1938 *
1939 * XXX
1940 * Several drivers do this; it should be abstracted and made MP-safe.
1941 * (Hmm... where have we seen this warning before :-> GO )
1942 */
1943 static int
1944 raidlock(rs)
1945 struct raid_softc *rs;
1946 {
1947 int error;
1948
1949 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1950 rs->sc_flags |= RAIDF_WANTED;
1951 if ((error =
1952 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1953 return (error);
1954 }
1955 rs->sc_flags |= RAIDF_LOCKED;
1956 return (0);
1957 }
1958 /*
1959 * Unlock and wake up any waiters.
1960 */
1961 static void
1962 raidunlock(rs)
1963 struct raid_softc *rs;
1964 {
1965
1966 rs->sc_flags &= ~RAIDF_LOCKED;
1967 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1968 rs->sc_flags &= ~RAIDF_WANTED;
1969 wakeup(rs);
1970 }
1971 }
1972
1973
1974 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1975 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1976
1977 int
1978 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1979 {
1980 RF_ComponentLabel_t component_label;
1981 raidread_component_label(dev, b_vp, &component_label);
1982 component_label.mod_counter = mod_counter;
1983 component_label.clean = RF_RAID_CLEAN;
1984 raidwrite_component_label(dev, b_vp, &component_label);
1985 return(0);
1986 }
1987
1988
1989 int
1990 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1991 {
1992 RF_ComponentLabel_t component_label;
1993 raidread_component_label(dev, b_vp, &component_label);
1994 component_label.mod_counter = mod_counter;
1995 component_label.clean = RF_RAID_DIRTY;
1996 raidwrite_component_label(dev, b_vp, &component_label);
1997 return(0);
1998 }
1999
2000 /* ARGSUSED */
2001 int
2002 raidread_component_label(dev, b_vp, component_label)
2003 dev_t dev;
2004 struct vnode *b_vp;
2005 RF_ComponentLabel_t *component_label;
2006 {
2007 struct buf *bp;
2008 int error;
2009
2010 /* XXX should probably ensure that we don't try to do this if
2011 someone has changed rf_protected_sectors. */
2012
2013 /* get a block of the appropriate size... */
2014 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2015 bp->b_dev = dev;
2016
2017 /* get our ducks in a row for the read */
2018 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2019 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2020 bp->b_flags = B_BUSY | B_READ;
2021 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2022
2023 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2024
2025 error = biowait(bp);
2026
2027 if (!error) {
2028 memcpy(component_label, bp->b_un.b_addr,
2029 sizeof(RF_ComponentLabel_t));
2030 #if 0
2031 printf("raidread_component_label: got component label:\n");
2032 printf("Version: %d\n",component_label->version);
2033 printf("Serial Number: %d\n",component_label->serial_number);
2034 printf("Mod counter: %d\n",component_label->mod_counter);
2035 printf("Row: %d\n", component_label->row);
2036 printf("Column: %d\n", component_label->column);
2037 printf("Num Rows: %d\n", component_label->num_rows);
2038 printf("Num Columns: %d\n", component_label->num_columns);
2039 printf("Clean: %d\n", component_label->clean);
2040 printf("Status: %d\n", component_label->status);
2041 #endif
2042 } else {
2043 printf("Failed to read RAID component label!\n");
2044 }
2045
2046 bp->b_flags = B_INVAL | B_AGE;
2047 brelse(bp);
2048 return(error);
2049 }
2050 /* ARGSUSED */
2051 int
2052 raidwrite_component_label(dev, b_vp, component_label)
2053 dev_t dev;
2054 struct vnode *b_vp;
2055 RF_ComponentLabel_t *component_label;
2056 {
2057 struct buf *bp;
2058 int error;
2059
2060 /* get a block of the appropriate size... */
2061 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2062 bp->b_dev = dev;
2063
2064 /* get our ducks in a row for the write */
2065 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2066 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2067 bp->b_flags = B_BUSY | B_WRITE;
2068 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2069
2070 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2071
2072 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2073
2074 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2075 error = biowait(bp);
2076 bp->b_flags = B_INVAL | B_AGE;
2077 brelse(bp);
2078 if (error) {
2079 printf("Failed to write RAID component info!\n");
2080 }
2081
2082 return(error);
2083 }
2084
2085 void
2086 rf_markalldirty( raidPtr )
2087 RF_Raid_t *raidPtr;
2088 {
2089 RF_ComponentLabel_t c_label;
2090 int r,c;
2091
2092 raidPtr->mod_counter++;
2093 for (r = 0; r < raidPtr->numRow; r++) {
2094 for (c = 0; c < raidPtr->numCol; c++) {
2095 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2096 raidread_component_label(
2097 raidPtr->Disks[r][c].dev,
2098 raidPtr->raid_cinfo[r][c].ci_vp,
2099 &c_label);
2100 if (c_label.status == rf_ds_spared) {
2101 /* XXX do something special...
2102 but whatever you do, don't
2103 try to access it!! */
2104 } else {
2105 #if 0
2106 c_label.status =
2107 raidPtr->Disks[r][c].status;
2108 raidwrite_component_label(
2109 raidPtr->Disks[r][c].dev,
2110 raidPtr->raid_cinfo[r][c].ci_vp,
2111 &c_label);
2112 #endif
2113 raidmarkdirty(
2114 raidPtr->Disks[r][c].dev,
2115 raidPtr->raid_cinfo[r][c].ci_vp,
2116 raidPtr->mod_counter);
2117 }
2118 }
2119 }
2120 }
2121 /* printf("Component labels marked dirty.\n"); */
2122 #if 0
2123 for( c = 0; c < raidPtr->numSpare ; c++) {
2124 sparecol = raidPtr->numCol + c;
2125 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2126 /*
2127
2128 XXX this is where we get fancy and map this spare
2129 into it's correct spot in the array.
2130
2131 */
2132 /*
2133
2134 we claim this disk is "optimal" if it's
2135 rf_ds_used_spare, as that means it should be
2136 directly substitutable for the disk it replaced.
2137 We note that too...
2138
2139 */
2140
2141 for(i=0;i<raidPtr->numRow;i++) {
2142 for(j=0;j<raidPtr->numCol;j++) {
2143 if ((raidPtr->Disks[i][j].spareRow ==
2144 r) &&
2145 (raidPtr->Disks[i][j].spareCol ==
2146 sparecol)) {
2147 srow = r;
2148 scol = sparecol;
2149 break;
2150 }
2151 }
2152 }
2153
2154 raidread_component_label(
2155 raidPtr->Disks[r][sparecol].dev,
2156 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2157 &c_label);
2158 /* make sure status is noted */
2159 c_label.version = RF_COMPONENT_LABEL_VERSION;
2160 c_label.mod_counter = raidPtr->mod_counter;
2161 c_label.serial_number = raidPtr->serial_number;
2162 c_label.row = srow;
2163 c_label.column = scol;
2164 c_label.num_rows = raidPtr->numRow;
2165 c_label.num_columns = raidPtr->numCol;
2166 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2167 c_label.status = rf_ds_optimal;
2168 raidwrite_component_label(
2169 raidPtr->Disks[r][sparecol].dev,
2170 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2171 &c_label);
2172 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2173 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2174 }
2175 }
2176
2177 #endif
2178 }
2179
2180
2181 void
2182 rf_update_component_labels( raidPtr )
2183 RF_Raid_t *raidPtr;
2184 {
2185 RF_ComponentLabel_t c_label;
2186 int sparecol;
2187 int r,c;
2188 int i,j;
2189 int srow, scol;
2190
2191 srow = -1;
2192 scol = -1;
2193
2194 /* XXX should do extra checks to make sure things really are clean,
2195 rather than blindly setting the clean bit... */
2196
2197 raidPtr->mod_counter++;
2198
2199 for (r = 0; r < raidPtr->numRow; r++) {
2200 for (c = 0; c < raidPtr->numCol; c++) {
2201 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2202 raidread_component_label(
2203 raidPtr->Disks[r][c].dev,
2204 raidPtr->raid_cinfo[r][c].ci_vp,
2205 &c_label);
2206 /* make sure status is noted */
2207 c_label.status = rf_ds_optimal;
2208 raidwrite_component_label(
2209 raidPtr->Disks[r][c].dev,
2210 raidPtr->raid_cinfo[r][c].ci_vp,
2211 &c_label);
2212 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2213 raidmarkclean(
2214 raidPtr->Disks[r][c].dev,
2215 raidPtr->raid_cinfo[r][c].ci_vp,
2216 raidPtr->mod_counter);
2217 }
2218 }
2219 /* else we don't touch it.. */
2220 #if 0
2221 else if (raidPtr->Disks[r][c].status !=
2222 rf_ds_failed) {
2223 raidread_component_label(
2224 raidPtr->Disks[r][c].dev,
2225 raidPtr->raid_cinfo[r][c].ci_vp,
2226 &c_label);
2227 /* make sure status is noted */
2228 c_label.status =
2229 raidPtr->Disks[r][c].status;
2230 raidwrite_component_label(
2231 raidPtr->Disks[r][c].dev,
2232 raidPtr->raid_cinfo[r][c].ci_vp,
2233 &c_label);
2234 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2235 raidmarkclean(
2236 raidPtr->Disks[r][c].dev,
2237 raidPtr->raid_cinfo[r][c].ci_vp,
2238 raidPtr->mod_counter);
2239 }
2240 }
2241 #endif
2242 }
2243 }
2244
2245 for( c = 0; c < raidPtr->numSpare ; c++) {
2246 sparecol = raidPtr->numCol + c;
2247 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2248 /*
2249
2250 we claim this disk is "optimal" if it's
2251 rf_ds_used_spare, as that means it should be
2252 directly substitutable for the disk it replaced.
2253 We note that too...
2254
2255 */
2256
2257 for(i=0;i<raidPtr->numRow;i++) {
2258 for(j=0;j<raidPtr->numCol;j++) {
2259 if ((raidPtr->Disks[i][j].spareRow ==
2260 0) &&
2261 (raidPtr->Disks[i][j].spareCol ==
2262 sparecol)) {
2263 srow = i;
2264 scol = j;
2265 break;
2266 }
2267 }
2268 }
2269
2270 raidread_component_label(
2271 raidPtr->Disks[0][sparecol].dev,
2272 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2273 &c_label);
2274 /* make sure status is noted */
2275 c_label.version = RF_COMPONENT_LABEL_VERSION;
2276 c_label.mod_counter = raidPtr->mod_counter;
2277 c_label.serial_number = raidPtr->serial_number;
2278 c_label.row = srow;
2279 c_label.column = scol;
2280 c_label.num_rows = raidPtr->numRow;
2281 c_label.num_columns = raidPtr->numCol;
2282 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2283 c_label.status = rf_ds_optimal;
2284 raidwrite_component_label(
2285 raidPtr->Disks[0][sparecol].dev,
2286 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2287 &c_label);
2288 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2289 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2290 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2291 raidPtr->mod_counter);
2292 }
2293 }
2294 }
2295 /* printf("Component labels updated\n"); */
2296 }
2297