rf_netbsdkintf.c revision 1.35 1 /* $NetBSD: rf_netbsdkintf.c,v 1.35 1999/12/14 15:27:00 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
183 * reconstruction
184 * requests */
185
186
187 decl_simple_lock_data(, recon_queue_mutex)
188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
190
191 /* prototypes */
192 static void KernelWakeupFunc(struct buf * bp);
193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
194 dev_t dev, RF_SectorNum_t startSect,
195 RF_SectorCount_t numSect, caddr_t buf,
196 void (*cbFunc) (struct buf *), void *cbArg,
197 int logBytesPerSector, struct proc * b_proc);
198
199 #define Dprintf0(s) if (rf_queueDebug) \
200 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
201 #define Dprintf1(s,a) if (rf_queueDebug) \
202 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
203 #define Dprintf2(s,a,b) if (rf_queueDebug) \
204 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
206 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
207
208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
210
211 void raidattach __P((int));
212 int raidsize __P((dev_t));
213
214 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
215 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
216 static int raidinit __P((dev_t, RF_Raid_t *, int));
217
218 int raidopen __P((dev_t, int, int, struct proc *));
219 int raidclose __P((dev_t, int, int, struct proc *));
220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
221 int raidwrite __P((dev_t, struct uio *, int));
222 int raidread __P((dev_t, struct uio *, int));
223 void raidstrategy __P((struct buf *));
224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
225
226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
228 void rf_update_component_labels( RF_Raid_t *);
229 /*
230 * Pilfered from ccd.c
231 */
232
233 struct raidbuf {
234 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
235 struct buf *rf_obp; /* ptr. to original I/O buf */
236 int rf_flags; /* misc. flags */
237 RF_DiskQueueData_t *req;/* the request that this was part of.. */
238 };
239
240
241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
242 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
243
244 /* XXX Not sure if the following should be replacing the raidPtrs above,
245 or if it should be used in conjunction with that... */
246
247 struct raid_softc {
248 int sc_flags; /* flags */
249 int sc_cflags; /* configuration flags */
250 size_t sc_size; /* size of the raid device */
251 dev_t sc_dev; /* our device.. */
252 char sc_xname[20]; /* XXX external name */
253 struct disk sc_dkdev; /* generic disk device info */
254 struct pool sc_cbufpool; /* component buffer pool */
255 struct buf buf_queue; /* used for the device queue */
256 };
257 /* sc_flags */
258 #define RAIDF_INITED 0x01 /* unit has been initialized */
259 #define RAIDF_WLABEL 0x02 /* label area is writable */
260 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
261 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
262 #define RAIDF_LOCKED 0x80 /* unit is locked */
263
264 #define raidunit(x) DISKUNIT(x)
265 static int numraid = 0;
266
267 /*
268 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269 * Be aware that large numbers can allow the driver to consume a lot of
270 * kernel memory, especially on writes, and in degraded mode reads.
271 *
272 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273 * a single 64K write will typically require 64K for the old data,
274 * 64K for the old parity, and 64K for the new parity, for a total
275 * of 192K (if the parity buffer is not re-used immediately).
276 * Even it if is used immedately, that's still 128K, which when multiplied
277 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
278 *
279 * Now in degraded mode, for example, a 64K read on the above setup may
280 * require data reconstruction, which will require *all* of the 4 remaining
281 * disks to participate -- 4 * 32K/disk == 128K again.
282 */
283
284 #ifndef RAIDOUTSTANDING
285 #define RAIDOUTSTANDING 6
286 #endif
287
288 #define RAIDLABELDEV(dev) \
289 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290
291 /* declared here, and made public, for the benefit of KVM stuff.. */
292 struct raid_softc *raid_softc;
293
294 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
295 struct disklabel *));
296 static void raidgetdisklabel __P((dev_t));
297 static void raidmakedisklabel __P((struct raid_softc *));
298
299 static int raidlock __P((struct raid_softc *));
300 static void raidunlock __P((struct raid_softc *));
301 int raidlookup __P((char *, struct proc * p, struct vnode **));
302
303 static void rf_markalldirty __P((RF_Raid_t *));
304
305 void
306 raidattach(num)
307 int num;
308 {
309 int raidID;
310 int i, rc;
311
312 #ifdef DEBUG
313 printf("raidattach: Asked for %d units\n", num);
314 #endif
315
316 if (num <= 0) {
317 #ifdef DIAGNOSTIC
318 panic("raidattach: count <= 0");
319 #endif
320 return;
321 }
322 /* This is where all the initialization stuff gets done. */
323
324 /* Make some space for requested number of units... */
325
326 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
327 if (raidPtrs == NULL) {
328 panic("raidPtrs is NULL!!\n");
329 }
330
331 rc = rf_mutex_init(&rf_sparet_wait_mutex);
332 if (rc) {
333 RF_PANIC();
334 }
335
336 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
337 recon_queue = NULL;
338
339 for (i = 0; i < numraid; i++)
340 raidPtrs[i] = NULL;
341 rc = rf_BootRaidframe();
342 if (rc == 0)
343 printf("Kernelized RAIDframe activated\n");
344 else
345 panic("Serious error booting RAID!!\n");
346
347 /* put together some datastructures like the CCD device does.. This
348 * lets us lock the device and what-not when it gets opened. */
349
350 raid_softc = (struct raid_softc *)
351 malloc(num * sizeof(struct raid_softc),
352 M_RAIDFRAME, M_NOWAIT);
353 if (raid_softc == NULL) {
354 printf("WARNING: no memory for RAIDframe driver\n");
355 return;
356 }
357 numraid = num;
358 bzero(raid_softc, num * sizeof(struct raid_softc));
359
360 for (raidID = 0; raidID < num; raidID++) {
361 raid_softc[raidID].buf_queue.b_actf = NULL;
362 raid_softc[raidID].buf_queue.b_actb =
363 &raid_softc[raidID].buf_queue.b_actf;
364 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
365 (RF_Raid_t *));
366 if (raidPtrs[raidID] == NULL) {
367 printf("raidPtrs[%d] is NULL\n", raidID);
368 }
369 }
370 }
371
372
373 int
374 raidsize(dev)
375 dev_t dev;
376 {
377 struct raid_softc *rs;
378 struct disklabel *lp;
379 int part, unit, omask, size;
380
381 unit = raidunit(dev);
382 if (unit >= numraid)
383 return (-1);
384 rs = &raid_softc[unit];
385
386 if ((rs->sc_flags & RAIDF_INITED) == 0)
387 return (-1);
388
389 part = DISKPART(dev);
390 omask = rs->sc_dkdev.dk_openmask & (1 << part);
391 lp = rs->sc_dkdev.dk_label;
392
393 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
394 return (-1);
395
396 if (lp->d_partitions[part].p_fstype != FS_SWAP)
397 size = -1;
398 else
399 size = lp->d_partitions[part].p_size *
400 (lp->d_secsize / DEV_BSIZE);
401
402 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
403 return (-1);
404
405 return (size);
406
407 }
408
409 int
410 raiddump(dev, blkno, va, size)
411 dev_t dev;
412 daddr_t blkno;
413 caddr_t va;
414 size_t size;
415 {
416 /* Not implemented. */
417 return ENXIO;
418 }
419 /* ARGSUSED */
420 int
421 raidopen(dev, flags, fmt, p)
422 dev_t dev;
423 int flags, fmt;
424 struct proc *p;
425 {
426 int unit = raidunit(dev);
427 struct raid_softc *rs;
428 struct disklabel *lp;
429 int part, pmask;
430 int error = 0;
431
432 if (unit >= numraid)
433 return (ENXIO);
434 rs = &raid_softc[unit];
435
436 if ((error = raidlock(rs)) != 0)
437 return (error);
438 lp = rs->sc_dkdev.dk_label;
439
440 part = DISKPART(dev);
441 pmask = (1 << part);
442
443 db1_printf(("Opening raid device number: %d partition: %d\n",
444 unit, part));
445
446
447 if ((rs->sc_flags & RAIDF_INITED) &&
448 (rs->sc_dkdev.dk_openmask == 0))
449 raidgetdisklabel(dev);
450
451 /* make sure that this partition exists */
452
453 if (part != RAW_PART) {
454 db1_printf(("Not a raw partition..\n"));
455 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
456 ((part >= lp->d_npartitions) ||
457 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
458 error = ENXIO;
459 raidunlock(rs);
460 db1_printf(("Bailing out...\n"));
461 return (error);
462 }
463 }
464 /* Prevent this unit from being unconfigured while open. */
465 switch (fmt) {
466 case S_IFCHR:
467 rs->sc_dkdev.dk_copenmask |= pmask;
468 break;
469
470 case S_IFBLK:
471 rs->sc_dkdev.dk_bopenmask |= pmask;
472 break;
473 }
474
475 if ((rs->sc_dkdev.dk_openmask == 0) &&
476 ((rs->sc_flags & RAIDF_INITED) != 0)) {
477 /* First one... mark things as dirty... Note that we *MUST*
478 have done a configure before this. I DO NOT WANT TO BE
479 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
480 THAT THEY BELONG TOGETHER!!!!! */
481 /* XXX should check to see if we're only open for reading
482 here... If so, we needn't do this, but then need some
483 other way of keeping track of what's happened.. */
484
485 rf_markalldirty( raidPtrs[unit] );
486 }
487
488
489 rs->sc_dkdev.dk_openmask =
490 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
491
492 raidunlock(rs);
493
494 return (error);
495
496
497 }
498 /* ARGSUSED */
499 int
500 raidclose(dev, flags, fmt, p)
501 dev_t dev;
502 int flags, fmt;
503 struct proc *p;
504 {
505 int unit = raidunit(dev);
506 struct raid_softc *rs;
507 int error = 0;
508 int part;
509
510 if (unit >= numraid)
511 return (ENXIO);
512 rs = &raid_softc[unit];
513
514 if ((error = raidlock(rs)) != 0)
515 return (error);
516
517 part = DISKPART(dev);
518
519 /* ...that much closer to allowing unconfiguration... */
520 switch (fmt) {
521 case S_IFCHR:
522 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
523 break;
524
525 case S_IFBLK:
526 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
527 break;
528 }
529 rs->sc_dkdev.dk_openmask =
530 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
531
532 if ((rs->sc_dkdev.dk_openmask == 0) &&
533 ((rs->sc_flags & RAIDF_INITED) != 0)) {
534 /* Last one... device is not unconfigured yet.
535 Device shutdown has taken care of setting the
536 clean bits if RAIDF_INITED is not set
537 mark things as clean... */
538 rf_update_component_labels( raidPtrs[unit] );
539 }
540
541 raidunlock(rs);
542 return (0);
543
544 }
545
546 void
547 raidstrategy(bp)
548 register struct buf *bp;
549 {
550 register int s;
551
552 unsigned int raidID = raidunit(bp->b_dev);
553 RF_Raid_t *raidPtr;
554 struct raid_softc *rs = &raid_softc[raidID];
555 struct disklabel *lp;
556 struct buf *dp;
557 int wlabel;
558
559 #if 0
560 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
561 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
562 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
563 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
564 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
565
566 if (bp->b_flags & B_READ)
567 db1_printf(("READ\n"));
568 else
569 db1_printf(("WRITE\n"));
570 #endif
571 if ((rs->sc_flags & RAIDF_INITED) ==0) {
572 bp->b_error = ENXIO;
573 bp->b_flags = B_ERROR;
574 bp->b_resid = bp->b_bcount;
575 biodone(bp);
576 return;
577 }
578 if (raidID >= numraid || !raidPtrs[raidID]) {
579 bp->b_error = ENODEV;
580 bp->b_flags |= B_ERROR;
581 bp->b_resid = bp->b_bcount;
582 biodone(bp);
583 return;
584 }
585 raidPtr = raidPtrs[raidID];
586 if (!raidPtr->valid) {
587 bp->b_error = ENODEV;
588 bp->b_flags |= B_ERROR;
589 bp->b_resid = bp->b_bcount;
590 biodone(bp);
591 return;
592 }
593 if (bp->b_bcount == 0) {
594 db1_printf(("b_bcount is zero..\n"));
595 biodone(bp);
596 return;
597 }
598 lp = rs->sc_dkdev.dk_label;
599
600 /*
601 * Do bounds checking and adjust transfer. If there's an
602 * error, the bounds check will flag that for us.
603 */
604
605 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
606 if (DISKPART(bp->b_dev) != RAW_PART)
607 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
608 db1_printf(("Bounds check failed!!:%d %d\n",
609 (int) bp->b_blkno, (int) wlabel));
610 biodone(bp);
611 return;
612 }
613 s = splbio();
614
615 bp->b_resid = 0;
616
617 /* stuff it onto our queue */
618
619 dp = &rs->buf_queue;
620 bp->b_actf = NULL;
621 bp->b_actb = dp->b_actb;
622 *dp->b_actb = bp;
623 dp->b_actb = &bp->b_actf;
624
625 raidstart(raidPtrs[raidID]);
626
627 splx(s);
628 }
629 /* ARGSUSED */
630 int
631 raidread(dev, uio, flags)
632 dev_t dev;
633 struct uio *uio;
634 int flags;
635 {
636 int unit = raidunit(dev);
637 struct raid_softc *rs;
638 int part;
639
640 if (unit >= numraid)
641 return (ENXIO);
642 rs = &raid_softc[unit];
643
644 if ((rs->sc_flags & RAIDF_INITED) == 0)
645 return (ENXIO);
646 part = DISKPART(dev);
647
648 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
649
650 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
651
652 }
653 /* ARGSUSED */
654 int
655 raidwrite(dev, uio, flags)
656 dev_t dev;
657 struct uio *uio;
658 int flags;
659 {
660 int unit = raidunit(dev);
661 struct raid_softc *rs;
662
663 if (unit >= numraid)
664 return (ENXIO);
665 rs = &raid_softc[unit];
666
667 if ((rs->sc_flags & RAIDF_INITED) == 0)
668 return (ENXIO);
669 db1_printf(("raidwrite\n"));
670 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
671
672 }
673
674 int
675 raidioctl(dev, cmd, data, flag, p)
676 dev_t dev;
677 u_long cmd;
678 caddr_t data;
679 int flag;
680 struct proc *p;
681 {
682 int unit = raidunit(dev);
683 int error = 0;
684 int part, pmask;
685 struct raid_softc *rs;
686 RF_Config_t *k_cfg, *u_cfg;
687 u_char *specific_buf;
688 int retcode = 0;
689 int row;
690 int column;
691 int s;
692 struct rf_recon_req *rrcopy, *rr;
693 RF_ComponentLabel_t *component_label;
694 RF_ComponentLabel_t ci_label;
695 RF_ComponentLabel_t **c_label_ptr;
696 RF_SingleComponent_t *sparePtr,*componentPtr;
697 RF_SingleComponent_t hot_spare;
698 RF_SingleComponent_t component;
699
700 if (unit >= numraid)
701 return (ENXIO);
702 rs = &raid_softc[unit];
703
704 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
705 (int) DISKPART(dev), (int) unit, (int) cmd));
706
707 /* Must be open for writes for these commands... */
708 switch (cmd) {
709 case DIOCSDINFO:
710 case DIOCWDINFO:
711 case DIOCWLABEL:
712 if ((flag & FWRITE) == 0)
713 return (EBADF);
714 }
715
716 /* Must be initialized for these... */
717 switch (cmd) {
718 case DIOCGDINFO:
719 case DIOCSDINFO:
720 case DIOCWDINFO:
721 case DIOCGPART:
722 case DIOCWLABEL:
723 case DIOCGDEFLABEL:
724 case RAIDFRAME_SHUTDOWN:
725 case RAIDFRAME_REWRITEPARITY:
726 case RAIDFRAME_GET_INFO:
727 case RAIDFRAME_RESET_ACCTOTALS:
728 case RAIDFRAME_GET_ACCTOTALS:
729 case RAIDFRAME_KEEP_ACCTOTALS:
730 case RAIDFRAME_GET_SIZE:
731 case RAIDFRAME_FAIL_DISK:
732 case RAIDFRAME_COPYBACK:
733 case RAIDFRAME_CHECKRECON:
734 case RAIDFRAME_GET_COMPONENT_LABEL:
735 case RAIDFRAME_SET_COMPONENT_LABEL:
736 case RAIDFRAME_ADD_HOT_SPARE:
737 case RAIDFRAME_REMOVE_HOT_SPARE:
738 case RAIDFRAME_INIT_LABELS:
739 case RAIDFRAME_REBUILD_IN_PLACE:
740 case RAIDFRAME_CHECK_PARITY:
741 if ((rs->sc_flags & RAIDF_INITED) == 0)
742 return (ENXIO);
743 }
744
745 switch (cmd) {
746
747
748 /* configure the system */
749 case RAIDFRAME_CONFIGURE:
750
751 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
752 /* copy-in the configuration information */
753 /* data points to a pointer to the configuration structure */
754 u_cfg = *((RF_Config_t **) data);
755 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
756 if (k_cfg == NULL) {
757 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
758 return (ENOMEM);
759 }
760 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
761 sizeof(RF_Config_t));
762 if (retcode) {
763 RF_Free(k_cfg, sizeof(RF_Config_t));
764 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
765 retcode));
766 return (retcode);
767 }
768 /* allocate a buffer for the layout-specific data, and copy it
769 * in */
770 if (k_cfg->layoutSpecificSize) {
771 if (k_cfg->layoutSpecificSize > 10000) {
772 /* sanity check */
773 RF_Free(k_cfg, sizeof(RF_Config_t));
774 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
775 return (EINVAL);
776 }
777 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
778 (u_char *));
779 if (specific_buf == NULL) {
780 RF_Free(k_cfg, sizeof(RF_Config_t));
781 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
782 return (ENOMEM);
783 }
784 retcode = copyin(k_cfg->layoutSpecific,
785 (caddr_t) specific_buf,
786 k_cfg->layoutSpecificSize);
787 if (retcode) {
788 RF_Free(k_cfg, sizeof(RF_Config_t));
789 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
790 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
791 retcode));
792 return (retcode);
793 }
794 } else
795 specific_buf = NULL;
796 k_cfg->layoutSpecific = specific_buf;
797
798 /* should do some kind of sanity check on the configuration.
799 * Store the sum of all the bytes in the last byte? */
800
801 /* configure the system */
802
803 raidPtrs[unit]->raidid = unit;
804
805 retcode = rf_Configure(raidPtrs[unit], k_cfg);
806
807 /* allow this many simultaneous IO's to this RAID device */
808 raidPtrs[unit]->openings = RAIDOUTSTANDING;
809
810 if (retcode == 0) {
811 retcode = raidinit(dev, raidPtrs[unit], unit);
812 rf_markalldirty( raidPtrs[unit] );
813 }
814 /* free the buffers. No return code here. */
815 if (k_cfg->layoutSpecificSize) {
816 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
817 }
818 RF_Free(k_cfg, sizeof(RF_Config_t));
819
820 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
821 retcode));
822
823 return (retcode);
824
825 /* shutdown the system */
826 case RAIDFRAME_SHUTDOWN:
827
828 if ((error = raidlock(rs)) != 0)
829 return (error);
830
831 /*
832 * If somebody has a partition mounted, we shouldn't
833 * shutdown.
834 */
835
836 part = DISKPART(dev);
837 pmask = (1 << part);
838 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
839 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
840 (rs->sc_dkdev.dk_copenmask & pmask))) {
841 raidunlock(rs);
842 return (EBUSY);
843 }
844
845 if (rf_debugKernelAccess) {
846 printf("call shutdown\n");
847 }
848
849 retcode = rf_Shutdown(raidPtrs[unit]);
850
851 db1_printf(("Done main shutdown\n"));
852
853 pool_destroy(&rs->sc_cbufpool);
854 db1_printf(("Done freeing component buffer freelist\n"));
855
856 /* It's no longer initialized... */
857 rs->sc_flags &= ~RAIDF_INITED;
858
859 /* Detach the disk. */
860 disk_detach(&rs->sc_dkdev);
861
862 raidunlock(rs);
863
864 return (retcode);
865 case RAIDFRAME_GET_COMPONENT_LABEL:
866 c_label_ptr = (RF_ComponentLabel_t **) data;
867 /* need to read the component label for the disk indicated
868 by row,column in component_label
869 XXX need to sanity check these values!!!
870 */
871
872 /* For practice, let's get it directly fromdisk, rather
873 than from the in-core copy */
874 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
875 (RF_ComponentLabel_t *));
876 if (component_label == NULL)
877 return (ENOMEM);
878
879 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
880
881 retcode = copyin( *c_label_ptr, component_label,
882 sizeof(RF_ComponentLabel_t));
883
884 if (retcode) {
885 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
886 return(retcode);
887 }
888
889 row = component_label->row;
890 column = component_label->column;
891
892 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
893 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
894 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
895 return(EINVAL);
896 }
897
898 raidread_component_label(
899 raidPtrs[unit]->Disks[row][column].dev,
900 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
901 component_label );
902
903 retcode = copyout((caddr_t) component_label,
904 (caddr_t) *c_label_ptr,
905 sizeof(RF_ComponentLabel_t));
906 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
907 return (retcode);
908
909 case RAIDFRAME_SET_COMPONENT_LABEL:
910 component_label = (RF_ComponentLabel_t *) data;
911
912 /* XXX check the label for valid stuff... */
913 /* Note that some things *should not* get modified --
914 the user should be re-initing the labels instead of
915 trying to patch things.
916 */
917
918 printf("Got component label:\n");
919 printf("Version: %d\n",component_label->version);
920 printf("Serial Number: %d\n",component_label->serial_number);
921 printf("Mod counter: %d\n",component_label->mod_counter);
922 printf("Row: %d\n", component_label->row);
923 printf("Column: %d\n", component_label->column);
924 printf("Num Rows: %d\n", component_label->num_rows);
925 printf("Num Columns: %d\n", component_label->num_columns);
926 printf("Clean: %d\n", component_label->clean);
927 printf("Status: %d\n", component_label->status);
928
929 row = component_label->row;
930 column = component_label->column;
931
932 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
933 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
934 return(EINVAL);
935 }
936
937 /* XXX this isn't allowed to do anything for now :-) */
938 #if 0
939 raidwrite_component_label(
940 raidPtrs[unit]->Disks[row][column].dev,
941 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
942 component_label );
943 #endif
944 return (0);
945
946 case RAIDFRAME_INIT_LABELS:
947 component_label = (RF_ComponentLabel_t *) data;
948 /*
949 we only want the serial number from
950 the above. We get all the rest of the information
951 from the config that was used to create this RAID
952 set.
953 */
954
955 raidPtrs[unit]->serial_number = component_label->serial_number;
956 /* current version number */
957 ci_label.version = RF_COMPONENT_LABEL_VERSION;
958 ci_label.serial_number = component_label->serial_number;
959 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
960 ci_label.num_rows = raidPtrs[unit]->numRow;
961 ci_label.num_columns = raidPtrs[unit]->numCol;
962 ci_label.clean = RF_RAID_DIRTY; /* not clean */
963 ci_label.status = rf_ds_optimal; /* "It's good!" */
964
965 for(row=0;row<raidPtrs[unit]->numRow;row++) {
966 ci_label.row = row;
967 for(column=0;column<raidPtrs[unit]->numCol;column++) {
968 ci_label.column = column;
969 raidwrite_component_label(
970 raidPtrs[unit]->Disks[row][column].dev,
971 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
972 &ci_label );
973 }
974 }
975
976 return (retcode);
977
978 /* initialize all parity */
979 case RAIDFRAME_REWRITEPARITY:
980
981 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
982 /* Parity for RAID 0 is trivially correct */
983 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
984 return(0);
985 }
986
987 /* borrow the thread of the requesting process */
988
989 s = splbio();
990 retcode = rf_RewriteParity(raidPtrs[unit]);
991 splx(s);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1010 return(retcode);
1011
1012 case RAIDFRAME_REMOVE_HOT_SPARE:
1013 return(retcode);
1014
1015 case RAIDFRAME_REBUILD_IN_PLACE:
1016
1017 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1018 /* Can't do this on a RAID 0!! */
1019 return(EINVAL);
1020 }
1021
1022 componentPtr = (RF_SingleComponent_t *) data;
1023 memcpy( &component, componentPtr,
1024 sizeof(RF_SingleComponent_t));
1025 row = component.row;
1026 column = component.column;
1027 printf("Rebuild: %d %d\n",row, column);
1028 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1029 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1030 return(EINVAL);
1031 }
1032 printf("Attempting a rebuild in place\n");
1033 s = splbio();
1034 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1035 splx(s);
1036 return(retcode);
1037
1038 case RAIDFRAME_GET_INFO:
1039 {
1040 RF_Raid_t *raid = raidPtrs[unit];
1041 RF_DeviceConfig_t *cfg, **ucfgp;
1042 int i, j, d;
1043
1044 if (!raid->valid)
1045 return (ENODEV);
1046 ucfgp = (RF_DeviceConfig_t **) data;
1047 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1048 (RF_DeviceConfig_t *));
1049 if (cfg == NULL)
1050 return (ENOMEM);
1051 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1052 cfg->rows = raid->numRow;
1053 cfg->cols = raid->numCol;
1054 cfg->ndevs = raid->numRow * raid->numCol;
1055 if (cfg->ndevs >= RF_MAX_DISKS) {
1056 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1057 return (ENOMEM);
1058 }
1059 cfg->nspares = raid->numSpare;
1060 if (cfg->nspares >= RF_MAX_DISKS) {
1061 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1062 return (ENOMEM);
1063 }
1064 cfg->maxqdepth = raid->maxQueueDepth;
1065 d = 0;
1066 for (i = 0; i < cfg->rows; i++) {
1067 for (j = 0; j < cfg->cols; j++) {
1068 cfg->devs[d] = raid->Disks[i][j];
1069 d++;
1070 }
1071 }
1072 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1073 cfg->spares[i] = raid->Disks[0][j];
1074 }
1075 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1076 sizeof(RF_DeviceConfig_t));
1077 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1078
1079 return (retcode);
1080 }
1081 break;
1082 case RAIDFRAME_CHECK_PARITY:
1083 *(int *) data = raidPtrs[unit]->parity_good;
1084 return (0);
1085 case RAIDFRAME_RESET_ACCTOTALS:
1086 {
1087 RF_Raid_t *raid = raidPtrs[unit];
1088
1089 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1090 return (0);
1091 }
1092 break;
1093
1094 case RAIDFRAME_GET_ACCTOTALS:
1095 {
1096 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1097 RF_Raid_t *raid = raidPtrs[unit];
1098
1099 *totals = raid->acc_totals;
1100 return (0);
1101 }
1102 break;
1103
1104 case RAIDFRAME_KEEP_ACCTOTALS:
1105 {
1106 RF_Raid_t *raid = raidPtrs[unit];
1107 int *keep = (int *) data;
1108
1109 raid->keep_acc_totals = *keep;
1110 return (0);
1111 }
1112 break;
1113
1114 case RAIDFRAME_GET_SIZE:
1115 *(int *) data = raidPtrs[unit]->totalSectors;
1116 return (0);
1117
1118 #define RAIDFRAME_RECON 1
1119 /* XXX The above should probably be set somewhere else!! GO */
1120 #if RAIDFRAME_RECON > 0
1121
1122 /* fail a disk & optionally start reconstruction */
1123 case RAIDFRAME_FAIL_DISK:
1124
1125 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1126 /* Can't do this on a RAID 0!! */
1127 return(EINVAL);
1128 }
1129
1130 rr = (struct rf_recon_req *) data;
1131
1132 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1133 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1134 return (EINVAL);
1135
1136 printf("raid%d: Failing the disk: row: %d col: %d\n",
1137 unit, rr->row, rr->col);
1138
1139 /* make a copy of the recon request so that we don't rely on
1140 * the user's buffer */
1141 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1142 bcopy(rr, rrcopy, sizeof(*rr));
1143 rrcopy->raidPtr = (void *) raidPtrs[unit];
1144
1145 LOCK_RECON_Q_MUTEX();
1146 rrcopy->next = recon_queue;
1147 recon_queue = rrcopy;
1148 wakeup(&recon_queue);
1149 UNLOCK_RECON_Q_MUTEX();
1150
1151 return (0);
1152
1153 /* invoke a copyback operation after recon on whatever disk
1154 * needs it, if any */
1155 case RAIDFRAME_COPYBACK:
1156
1157 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1158 /* This makes no sense on a RAID 0!! */
1159 return(EINVAL);
1160 }
1161
1162 /* borrow the current thread to get this done */
1163
1164 s = splbio();
1165 rf_CopybackReconstructedData(raidPtrs[unit]);
1166 splx(s);
1167 return (0);
1168
1169 /* return the percentage completion of reconstruction */
1170 case RAIDFRAME_CHECKRECON:
1171 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1172 /* This makes no sense on a RAID 0 */
1173 return(EINVAL);
1174 }
1175
1176 row = *(int *) data;
1177 if (row < 0 || row >= raidPtrs[unit]->numRow)
1178 return (EINVAL);
1179 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1180 *(int *) data = 100;
1181 else
1182 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1183 return (0);
1184
1185 /* the sparetable daemon calls this to wait for the kernel to
1186 * need a spare table. this ioctl does not return until a
1187 * spare table is needed. XXX -- calling mpsleep here in the
1188 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1189 * -- I should either compute the spare table in the kernel,
1190 * or have a different -- XXX XXX -- interface (a different
1191 * character device) for delivering the table -- XXX */
1192 #if 0
1193 case RAIDFRAME_SPARET_WAIT:
1194 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1195 while (!rf_sparet_wait_queue)
1196 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1197 waitreq = rf_sparet_wait_queue;
1198 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1199 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1200
1201 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1202
1203 RF_Free(waitreq, sizeof(*waitreq));
1204 return (0);
1205
1206
1207 /* wakes up a process waiting on SPARET_WAIT and puts an error
1208 * code in it that will cause the dameon to exit */
1209 case RAIDFRAME_ABORT_SPARET_WAIT:
1210 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1211 waitreq->fcol = -1;
1212 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1213 waitreq->next = rf_sparet_wait_queue;
1214 rf_sparet_wait_queue = waitreq;
1215 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1216 wakeup(&rf_sparet_wait_queue);
1217 return (0);
1218
1219 /* used by the spare table daemon to deliver a spare table
1220 * into the kernel */
1221 case RAIDFRAME_SEND_SPARET:
1222
1223 /* install the spare table */
1224 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1225
1226 /* respond to the requestor. the return status of the spare
1227 * table installation is passed in the "fcol" field */
1228 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1229 waitreq->fcol = retcode;
1230 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1231 waitreq->next = rf_sparet_resp_queue;
1232 rf_sparet_resp_queue = waitreq;
1233 wakeup(&rf_sparet_resp_queue);
1234 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1235
1236 return (retcode);
1237 #endif
1238
1239
1240 #endif /* RAIDFRAME_RECON > 0 */
1241
1242 default:
1243 break; /* fall through to the os-specific code below */
1244
1245 }
1246
1247 if (!raidPtrs[unit]->valid)
1248 return (EINVAL);
1249
1250 /*
1251 * Add support for "regular" device ioctls here.
1252 */
1253
1254 switch (cmd) {
1255 case DIOCGDINFO:
1256 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1257 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1258 break;
1259
1260 case DIOCGPART:
1261 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1262 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1263 ((struct partinfo *) data)->part =
1264 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1265 break;
1266
1267 case DIOCWDINFO:
1268 db1_printf(("DIOCWDINFO\n"));
1269 case DIOCSDINFO:
1270 db1_printf(("DIOCSDINFO\n"));
1271 if ((error = raidlock(rs)) != 0)
1272 return (error);
1273
1274 rs->sc_flags |= RAIDF_LABELLING;
1275
1276 error = setdisklabel(rs->sc_dkdev.dk_label,
1277 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1278 if (error == 0) {
1279 if (cmd == DIOCWDINFO)
1280 error = writedisklabel(RAIDLABELDEV(dev),
1281 raidstrategy, rs->sc_dkdev.dk_label,
1282 rs->sc_dkdev.dk_cpulabel);
1283 }
1284 rs->sc_flags &= ~RAIDF_LABELLING;
1285
1286 raidunlock(rs);
1287
1288 if (error)
1289 return (error);
1290 break;
1291
1292 case DIOCWLABEL:
1293 db1_printf(("DIOCWLABEL\n"));
1294 if (*(int *) data != 0)
1295 rs->sc_flags |= RAIDF_WLABEL;
1296 else
1297 rs->sc_flags &= ~RAIDF_WLABEL;
1298 break;
1299
1300 case DIOCGDEFLABEL:
1301 db1_printf(("DIOCGDEFLABEL\n"));
1302 raidgetdefaultlabel(raidPtrs[unit], rs,
1303 (struct disklabel *) data);
1304 break;
1305
1306 default:
1307 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1308 }
1309 return (retcode);
1310
1311 }
1312
1313
1314 /* raidinit -- complete the rest of the initialization for the
1315 RAIDframe device. */
1316
1317
1318 static int
1319 raidinit(dev, raidPtr, unit)
1320 dev_t dev;
1321 RF_Raid_t *raidPtr;
1322 int unit;
1323 {
1324 int retcode;
1325 /* int ix; */
1326 /* struct raidbuf *raidbp; */
1327 struct raid_softc *rs;
1328
1329 retcode = 0;
1330
1331 rs = &raid_softc[unit];
1332 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1333 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1334
1335
1336 /* XXX should check return code first... */
1337 rs->sc_flags |= RAIDF_INITED;
1338
1339 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1340
1341 rs->sc_dkdev.dk_name = rs->sc_xname;
1342
1343 /* disk_attach actually creates space for the CPU disklabel, among
1344 * other things, so it's critical to call this *BEFORE* we try putzing
1345 * with disklabels. */
1346
1347 disk_attach(&rs->sc_dkdev);
1348
1349 /* XXX There may be a weird interaction here between this, and
1350 * protectedSectors, as used in RAIDframe. */
1351
1352 rs->sc_size = raidPtr->totalSectors;
1353 rs->sc_dev = dev;
1354
1355 return (retcode);
1356 }
1357
1358 /*
1359 * This kernel thread never exits. It is created once, and persists
1360 * until the system reboots.
1361 */
1362
1363 void
1364 rf_ReconKernelThread()
1365 {
1366 struct rf_recon_req *req;
1367 int s;
1368
1369 /* XXX not sure what spl() level we should be at here... probably
1370 * splbio() */
1371 s = splbio();
1372
1373 while (1) {
1374 /* grab the next reconstruction request from the queue */
1375 LOCK_RECON_Q_MUTEX();
1376 while (!recon_queue) {
1377 UNLOCK_RECON_Q_MUTEX();
1378 tsleep(&recon_queue, PRIBIO,
1379 "raidframe recon", 0);
1380 LOCK_RECON_Q_MUTEX();
1381 }
1382 req = recon_queue;
1383 recon_queue = recon_queue->next;
1384 UNLOCK_RECON_Q_MUTEX();
1385
1386 /*
1387 * If flags specifies that we should start recon, this call
1388 * will not return until reconstruction completes, fails,
1389 * or is aborted.
1390 */
1391 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1392 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1393
1394 RF_Free(req, sizeof(*req));
1395 }
1396 }
1397 /* wake up the daemon & tell it to get us a spare table
1398 * XXX
1399 * the entries in the queues should be tagged with the raidPtr
1400 * so that in the extremely rare case that two recons happen at once,
1401 * we know for which device were requesting a spare table
1402 * XXX
1403 */
1404 int
1405 rf_GetSpareTableFromDaemon(req)
1406 RF_SparetWait_t *req;
1407 {
1408 int retcode;
1409
1410 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1411 req->next = rf_sparet_wait_queue;
1412 rf_sparet_wait_queue = req;
1413 wakeup(&rf_sparet_wait_queue);
1414
1415 /* mpsleep unlocks the mutex */
1416 while (!rf_sparet_resp_queue) {
1417 tsleep(&rf_sparet_resp_queue, PRIBIO,
1418 "raidframe getsparetable", 0);
1419 }
1420 req = rf_sparet_resp_queue;
1421 rf_sparet_resp_queue = req->next;
1422 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1423
1424 retcode = req->fcol;
1425 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1426 * alloc'd */
1427 return (retcode);
1428 }
1429 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1430 * bp & passes it down.
1431 * any calls originating in the kernel must use non-blocking I/O
1432 * do some extra sanity checking to return "appropriate" error values for
1433 * certain conditions (to make some standard utilities work)
1434 *
1435 * Formerly known as: rf_DoAccessKernel
1436 */
1437 void
1438 raidstart(raidPtr)
1439 RF_Raid_t *raidPtr;
1440 {
1441 RF_SectorCount_t num_blocks, pb, sum;
1442 RF_RaidAddr_t raid_addr;
1443 int retcode;
1444 struct partition *pp;
1445 daddr_t blocknum;
1446 int unit;
1447 struct raid_softc *rs;
1448 int do_async;
1449 struct buf *bp;
1450 struct buf *dp;
1451
1452 unit = raidPtr->raidid;
1453 rs = &raid_softc[unit];
1454
1455 /* Check to see if we're at the limit... */
1456 RF_LOCK_MUTEX(raidPtr->mutex);
1457 while (raidPtr->openings > 0) {
1458 RF_UNLOCK_MUTEX(raidPtr->mutex);
1459
1460 /* get the next item, if any, from the queue */
1461 dp = &rs->buf_queue;
1462 bp = dp->b_actf;
1463 if (bp == NULL) {
1464 /* nothing more to do */
1465 return;
1466 }
1467
1468 /* update structures */
1469 dp = bp->b_actf;
1470 if (dp != NULL) {
1471 dp->b_actb = bp->b_actb;
1472 } else {
1473 rs->buf_queue.b_actb = bp->b_actb;
1474 }
1475 *bp->b_actb = dp;
1476
1477 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1478 * partition.. Need to make it absolute to the underlying
1479 * device.. */
1480
1481 blocknum = bp->b_blkno;
1482 if (DISKPART(bp->b_dev) != RAW_PART) {
1483 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1484 blocknum += pp->p_offset;
1485 }
1486
1487 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1488 (int) blocknum));
1489
1490 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1491 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1492
1493 /* *THIS* is where we adjust what block we're going to...
1494 * but DO NOT TOUCH bp->b_blkno!!! */
1495 raid_addr = blocknum;
1496
1497 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1498 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1499 sum = raid_addr + num_blocks + pb;
1500 if (1 || rf_debugKernelAccess) {
1501 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1502 (int) raid_addr, (int) sum, (int) num_blocks,
1503 (int) pb, (int) bp->b_resid));
1504 }
1505 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1506 || (sum < num_blocks) || (sum < pb)) {
1507 bp->b_error = ENOSPC;
1508 bp->b_flags |= B_ERROR;
1509 bp->b_resid = bp->b_bcount;
1510 biodone(bp);
1511 RF_LOCK_MUTEX(raidPtr->mutex);
1512 continue;
1513 }
1514 /*
1515 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1516 */
1517
1518 if (bp->b_bcount & raidPtr->sectorMask) {
1519 bp->b_error = EINVAL;
1520 bp->b_flags |= B_ERROR;
1521 bp->b_resid = bp->b_bcount;
1522 biodone(bp);
1523 RF_LOCK_MUTEX(raidPtr->mutex);
1524 continue;
1525
1526 }
1527 db1_printf(("Calling DoAccess..\n"));
1528
1529
1530 RF_LOCK_MUTEX(raidPtr->mutex);
1531 raidPtr->openings--;
1532 RF_UNLOCK_MUTEX(raidPtr->mutex);
1533
1534 /*
1535 * Everything is async.
1536 */
1537 do_async = 1;
1538
1539 /* don't ever condition on bp->b_flags & B_WRITE.
1540 * always condition on B_READ instead */
1541
1542 /* XXX we're still at splbio() here... do we *really*
1543 need to be? */
1544
1545 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1546 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1547 do_async, raid_addr, num_blocks,
1548 bp->b_un.b_addr, bp, NULL, NULL,
1549 RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1550
1551
1552 RF_LOCK_MUTEX(raidPtr->mutex);
1553 }
1554 RF_UNLOCK_MUTEX(raidPtr->mutex);
1555 }
1556
1557
1558
1559
1560 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1561
1562 int
1563 rf_DispatchKernelIO(queue, req)
1564 RF_DiskQueue_t *queue;
1565 RF_DiskQueueData_t *req;
1566 {
1567 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1568 struct buf *bp;
1569 struct raidbuf *raidbp = NULL;
1570 struct raid_softc *rs;
1571 int unit;
1572
1573 /* XXX along with the vnode, we also need the softc associated with
1574 * this device.. */
1575
1576 req->queue = queue;
1577
1578 unit = queue->raidPtr->raidid;
1579
1580 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1581
1582 if (unit >= numraid) {
1583 printf("Invalid unit number: %d %d\n", unit, numraid);
1584 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1585 }
1586 rs = &raid_softc[unit];
1587
1588 /* XXX is this the right place? */
1589 disk_busy(&rs->sc_dkdev);
1590
1591 bp = req->bp;
1592 #if 1
1593 /* XXX when there is a physical disk failure, someone is passing us a
1594 * buffer that contains old stuff!! Attempt to deal with this problem
1595 * without taking a performance hit... (not sure where the real bug
1596 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1597
1598 if (bp->b_flags & B_ERROR) {
1599 bp->b_flags &= ~B_ERROR;
1600 }
1601 if (bp->b_error != 0) {
1602 bp->b_error = 0;
1603 }
1604 #endif
1605 raidbp = RAIDGETBUF(rs);
1606
1607 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1608
1609 /*
1610 * context for raidiodone
1611 */
1612 raidbp->rf_obp = bp;
1613 raidbp->req = req;
1614
1615 LIST_INIT(&raidbp->rf_buf.b_dep);
1616
1617 switch (req->type) {
1618 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1619 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1620 * queue->row, queue->col); */
1621 /* XXX need to do something extra here.. */
1622 /* I'm leaving this in, as I've never actually seen it used,
1623 * and I'd like folks to report it... GO */
1624 printf(("WAKEUP CALLED\n"));
1625 queue->numOutstanding++;
1626
1627 /* XXX need to glue the original buffer into this?? */
1628
1629 KernelWakeupFunc(&raidbp->rf_buf);
1630 break;
1631
1632 case RF_IO_TYPE_READ:
1633 case RF_IO_TYPE_WRITE:
1634
1635 if (req->tracerec) {
1636 RF_ETIMER_START(req->tracerec->timer);
1637 }
1638 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1639 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1640 req->sectorOffset, req->numSector,
1641 req->buf, KernelWakeupFunc, (void *) req,
1642 queue->raidPtr->logBytesPerSector, req->b_proc);
1643
1644 if (rf_debugKernelAccess) {
1645 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1646 (long) bp->b_blkno));
1647 }
1648 queue->numOutstanding++;
1649 queue->last_deq_sector = req->sectorOffset;
1650 /* acc wouldn't have been let in if there were any pending
1651 * reqs at any other priority */
1652 queue->curPriority = req->priority;
1653 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1654 * req->type, queue->row, queue->col); */
1655
1656 db1_printf(("Going for %c to unit %d row %d col %d\n",
1657 req->type, unit, queue->row, queue->col));
1658 db1_printf(("sector %d count %d (%d bytes) %d\n",
1659 (int) req->sectorOffset, (int) req->numSector,
1660 (int) (req->numSector <<
1661 queue->raidPtr->logBytesPerSector),
1662 (int) queue->raidPtr->logBytesPerSector));
1663 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1664 raidbp->rf_buf.b_vp->v_numoutput++;
1665 }
1666 VOP_STRATEGY(&raidbp->rf_buf);
1667
1668 break;
1669
1670 default:
1671 panic("bad req->type in rf_DispatchKernelIO");
1672 }
1673 db1_printf(("Exiting from DispatchKernelIO\n"));
1674 return (0);
1675 }
1676 /* this is the callback function associated with a I/O invoked from
1677 kernel code.
1678 */
1679 static void
1680 KernelWakeupFunc(vbp)
1681 struct buf *vbp;
1682 {
1683 RF_DiskQueueData_t *req = NULL;
1684 RF_DiskQueue_t *queue;
1685 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1686 struct buf *bp;
1687 struct raid_softc *rs;
1688 int unit;
1689 register int s;
1690
1691 s = splbio(); /* XXX */
1692 db1_printf(("recovering the request queue:\n"));
1693 req = raidbp->req;
1694
1695 bp = raidbp->rf_obp;
1696 #if 0
1697 db1_printf(("bp=0x%x\n", bp));
1698 #endif
1699
1700 queue = (RF_DiskQueue_t *) req->queue;
1701
1702 if (raidbp->rf_buf.b_flags & B_ERROR) {
1703 #if 0
1704 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1705 #endif
1706 bp->b_flags |= B_ERROR;
1707 bp->b_error = raidbp->rf_buf.b_error ?
1708 raidbp->rf_buf.b_error : EIO;
1709 }
1710 #if 0
1711 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1712 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1713 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1714 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1715 #endif
1716
1717 /* XXX methinks this could be wrong... */
1718 #if 1
1719 bp->b_resid = raidbp->rf_buf.b_resid;
1720 #endif
1721
1722 if (req->tracerec) {
1723 RF_ETIMER_STOP(req->tracerec->timer);
1724 RF_ETIMER_EVAL(req->tracerec->timer);
1725 RF_LOCK_MUTEX(rf_tracing_mutex);
1726 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1727 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1728 req->tracerec->num_phys_ios++;
1729 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1730 }
1731 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1732
1733 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1734
1735
1736 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1737 * ballistic, and mark the component as hosed... */
1738 #if 1
1739 if (bp->b_flags & B_ERROR) {
1740 /* Mark the disk as dead */
1741 /* but only mark it once... */
1742 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1743 rf_ds_optimal) {
1744 printf("raid%d: IO Error. Marking %s as failed.\n",
1745 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1746 queue->raidPtr->Disks[queue->row][queue->col].status =
1747 rf_ds_failed;
1748 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1749 queue->raidPtr->numFailures++;
1750 /* XXX here we should bump the version number for each component, and write that data out */
1751 } else { /* Disk is already dead... */
1752 /* printf("Disk already marked as dead!\n"); */
1753 }
1754
1755 }
1756 #endif
1757
1758 rs = &raid_softc[unit];
1759 RAIDPUTBUF(rs, raidbp);
1760
1761
1762 if (bp->b_resid == 0) {
1763 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1764 unit, bp->b_resid, bp->b_bcount));
1765 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1766 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1767 } else {
1768 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1769 }
1770
1771 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1772 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1773 /* printf("Exiting KernelWakeupFunc\n"); */
1774
1775 splx(s); /* XXX */
1776 }
1777
1778
1779
1780 /*
1781 * initialize a buf structure for doing an I/O in the kernel.
1782 */
1783 static void
1784 InitBP(
1785 struct buf * bp,
1786 struct vnode * b_vp,
1787 unsigned rw_flag,
1788 dev_t dev,
1789 RF_SectorNum_t startSect,
1790 RF_SectorCount_t numSect,
1791 caddr_t buf,
1792 void (*cbFunc) (struct buf *),
1793 void *cbArg,
1794 int logBytesPerSector,
1795 struct proc * b_proc)
1796 {
1797 /* bp->b_flags = B_PHYS | rw_flag; */
1798 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1799 bp->b_bcount = numSect << logBytesPerSector;
1800 bp->b_bufsize = bp->b_bcount;
1801 bp->b_error = 0;
1802 bp->b_dev = dev;
1803 db1_printf(("bp->b_dev is %d\n", dev));
1804 bp->b_un.b_addr = buf;
1805 #if 0
1806 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1807 #endif
1808 bp->b_blkno = startSect;
1809 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1810 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1811 if (bp->b_bcount == 0) {
1812 panic("bp->b_bcount is zero in InitBP!!\n");
1813 }
1814 bp->b_proc = b_proc;
1815 bp->b_iodone = cbFunc;
1816 bp->b_vp = b_vp;
1817
1818 }
1819
1820 static void
1821 raidgetdefaultlabel(raidPtr, rs, lp)
1822 RF_Raid_t *raidPtr;
1823 struct raid_softc *rs;
1824 struct disklabel *lp;
1825 {
1826 db1_printf(("Building a default label...\n"));
1827 bzero(lp, sizeof(*lp));
1828
1829 /* fabricate a label... */
1830 lp->d_secperunit = raidPtr->totalSectors;
1831 lp->d_secsize = raidPtr->bytesPerSector;
1832 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1833 lp->d_ntracks = 1;
1834 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1835 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1836
1837 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1838 lp->d_type = DTYPE_RAID;
1839 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1840 lp->d_rpm = 3600;
1841 lp->d_interleave = 1;
1842 lp->d_flags = 0;
1843
1844 lp->d_partitions[RAW_PART].p_offset = 0;
1845 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1846 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1847 lp->d_npartitions = RAW_PART + 1;
1848
1849 lp->d_magic = DISKMAGIC;
1850 lp->d_magic2 = DISKMAGIC;
1851 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1852
1853 }
1854 /*
1855 * Read the disklabel from the raid device. If one is not present, fake one
1856 * up.
1857 */
1858 static void
1859 raidgetdisklabel(dev)
1860 dev_t dev;
1861 {
1862 int unit = raidunit(dev);
1863 struct raid_softc *rs = &raid_softc[unit];
1864 char *errstring;
1865 struct disklabel *lp = rs->sc_dkdev.dk_label;
1866 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1867 RF_Raid_t *raidPtr;
1868
1869 db1_printf(("Getting the disklabel...\n"));
1870
1871 bzero(clp, sizeof(*clp));
1872
1873 raidPtr = raidPtrs[unit];
1874
1875 raidgetdefaultlabel(raidPtr, rs, lp);
1876
1877 /*
1878 * Call the generic disklabel extraction routine.
1879 */
1880 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1881 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1882 if (errstring)
1883 raidmakedisklabel(rs);
1884 else {
1885 int i;
1886 struct partition *pp;
1887
1888 /*
1889 * Sanity check whether the found disklabel is valid.
1890 *
1891 * This is necessary since total size of the raid device
1892 * may vary when an interleave is changed even though exactly
1893 * same componets are used, and old disklabel may used
1894 * if that is found.
1895 */
1896 if (lp->d_secperunit != rs->sc_size)
1897 printf("WARNING: %s: "
1898 "total sector size in disklabel (%d) != "
1899 "the size of raid (%ld)\n", rs->sc_xname,
1900 lp->d_secperunit, (long) rs->sc_size);
1901 for (i = 0; i < lp->d_npartitions; i++) {
1902 pp = &lp->d_partitions[i];
1903 if (pp->p_offset + pp->p_size > rs->sc_size)
1904 printf("WARNING: %s: end of partition `%c' "
1905 "exceeds the size of raid (%ld)\n",
1906 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1907 }
1908 }
1909
1910 }
1911 /*
1912 * Take care of things one might want to take care of in the event
1913 * that a disklabel isn't present.
1914 */
1915 static void
1916 raidmakedisklabel(rs)
1917 struct raid_softc *rs;
1918 {
1919 struct disklabel *lp = rs->sc_dkdev.dk_label;
1920 db1_printf(("Making a label..\n"));
1921
1922 /*
1923 * For historical reasons, if there's no disklabel present
1924 * the raw partition must be marked FS_BSDFFS.
1925 */
1926
1927 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1928
1929 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1930
1931 lp->d_checksum = dkcksum(lp);
1932 }
1933 /*
1934 * Lookup the provided name in the filesystem. If the file exists,
1935 * is a valid block device, and isn't being used by anyone else,
1936 * set *vpp to the file's vnode.
1937 * You'll find the original of this in ccd.c
1938 */
1939 int
1940 raidlookup(path, p, vpp)
1941 char *path;
1942 struct proc *p;
1943 struct vnode **vpp; /* result */
1944 {
1945 struct nameidata nd;
1946 struct vnode *vp;
1947 struct vattr va;
1948 int error;
1949
1950 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1951 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1952 #ifdef DEBUG
1953 printf("RAIDframe: vn_open returned %d\n", error);
1954 #endif
1955 return (error);
1956 }
1957 vp = nd.ni_vp;
1958 if (vp->v_usecount > 1) {
1959 VOP_UNLOCK(vp, 0);
1960 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1961 return (EBUSY);
1962 }
1963 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1964 VOP_UNLOCK(vp, 0);
1965 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1966 return (error);
1967 }
1968 /* XXX: eventually we should handle VREG, too. */
1969 if (va.va_type != VBLK) {
1970 VOP_UNLOCK(vp, 0);
1971 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1972 return (ENOTBLK);
1973 }
1974 VOP_UNLOCK(vp, 0);
1975 *vpp = vp;
1976 return (0);
1977 }
1978 /*
1979 * Wait interruptibly for an exclusive lock.
1980 *
1981 * XXX
1982 * Several drivers do this; it should be abstracted and made MP-safe.
1983 * (Hmm... where have we seen this warning before :-> GO )
1984 */
1985 static int
1986 raidlock(rs)
1987 struct raid_softc *rs;
1988 {
1989 int error;
1990
1991 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1992 rs->sc_flags |= RAIDF_WANTED;
1993 if ((error =
1994 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1995 return (error);
1996 }
1997 rs->sc_flags |= RAIDF_LOCKED;
1998 return (0);
1999 }
2000 /*
2001 * Unlock and wake up any waiters.
2002 */
2003 static void
2004 raidunlock(rs)
2005 struct raid_softc *rs;
2006 {
2007
2008 rs->sc_flags &= ~RAIDF_LOCKED;
2009 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2010 rs->sc_flags &= ~RAIDF_WANTED;
2011 wakeup(rs);
2012 }
2013 }
2014
2015
2016 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2017 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2018
2019 int
2020 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2021 {
2022 RF_ComponentLabel_t component_label;
2023 raidread_component_label(dev, b_vp, &component_label);
2024 component_label.mod_counter = mod_counter;
2025 component_label.clean = RF_RAID_CLEAN;
2026 raidwrite_component_label(dev, b_vp, &component_label);
2027 return(0);
2028 }
2029
2030
2031 int
2032 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2033 {
2034 RF_ComponentLabel_t component_label;
2035 raidread_component_label(dev, b_vp, &component_label);
2036 component_label.mod_counter = mod_counter;
2037 component_label.clean = RF_RAID_DIRTY;
2038 raidwrite_component_label(dev, b_vp, &component_label);
2039 return(0);
2040 }
2041
2042 /* ARGSUSED */
2043 int
2044 raidread_component_label(dev, b_vp, component_label)
2045 dev_t dev;
2046 struct vnode *b_vp;
2047 RF_ComponentLabel_t *component_label;
2048 {
2049 struct buf *bp;
2050 int error;
2051
2052 /* XXX should probably ensure that we don't try to do this if
2053 someone has changed rf_protected_sectors. */
2054
2055 /* get a block of the appropriate size... */
2056 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2057 bp->b_dev = dev;
2058
2059 /* get our ducks in a row for the read */
2060 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2061 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2062 bp->b_flags = B_BUSY | B_READ;
2063 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2064
2065 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2066
2067 error = biowait(bp);
2068
2069 if (!error) {
2070 memcpy(component_label, bp->b_un.b_addr,
2071 sizeof(RF_ComponentLabel_t));
2072 #if 0
2073 printf("raidread_component_label: got component label:\n");
2074 printf("Version: %d\n",component_label->version);
2075 printf("Serial Number: %d\n",component_label->serial_number);
2076 printf("Mod counter: %d\n",component_label->mod_counter);
2077 printf("Row: %d\n", component_label->row);
2078 printf("Column: %d\n", component_label->column);
2079 printf("Num Rows: %d\n", component_label->num_rows);
2080 printf("Num Columns: %d\n", component_label->num_columns);
2081 printf("Clean: %d\n", component_label->clean);
2082 printf("Status: %d\n", component_label->status);
2083 #endif
2084 } else {
2085 printf("Failed to read RAID component label!\n");
2086 }
2087
2088 bp->b_flags = B_INVAL | B_AGE;
2089 brelse(bp);
2090 return(error);
2091 }
2092 /* ARGSUSED */
2093 int
2094 raidwrite_component_label(dev, b_vp, component_label)
2095 dev_t dev;
2096 struct vnode *b_vp;
2097 RF_ComponentLabel_t *component_label;
2098 {
2099 struct buf *bp;
2100 int error;
2101
2102 /* get a block of the appropriate size... */
2103 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2104 bp->b_dev = dev;
2105
2106 /* get our ducks in a row for the write */
2107 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2108 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2109 bp->b_flags = B_BUSY | B_WRITE;
2110 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2111
2112 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2113
2114 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2115
2116 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2117 error = biowait(bp);
2118 bp->b_flags = B_INVAL | B_AGE;
2119 brelse(bp);
2120 if (error) {
2121 printf("Failed to write RAID component info!\n");
2122 }
2123
2124 return(error);
2125 }
2126
2127 void
2128 rf_markalldirty( raidPtr )
2129 RF_Raid_t *raidPtr;
2130 {
2131 RF_ComponentLabel_t c_label;
2132 int r,c;
2133
2134 raidPtr->mod_counter++;
2135 for (r = 0; r < raidPtr->numRow; r++) {
2136 for (c = 0; c < raidPtr->numCol; c++) {
2137 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2138 raidread_component_label(
2139 raidPtr->Disks[r][c].dev,
2140 raidPtr->raid_cinfo[r][c].ci_vp,
2141 &c_label);
2142 if (c_label.status == rf_ds_spared) {
2143 /* XXX do something special...
2144 but whatever you do, don't
2145 try to access it!! */
2146 } else {
2147 #if 0
2148 c_label.status =
2149 raidPtr->Disks[r][c].status;
2150 raidwrite_component_label(
2151 raidPtr->Disks[r][c].dev,
2152 raidPtr->raid_cinfo[r][c].ci_vp,
2153 &c_label);
2154 #endif
2155 raidmarkdirty(
2156 raidPtr->Disks[r][c].dev,
2157 raidPtr->raid_cinfo[r][c].ci_vp,
2158 raidPtr->mod_counter);
2159 }
2160 }
2161 }
2162 }
2163 /* printf("Component labels marked dirty.\n"); */
2164 #if 0
2165 for( c = 0; c < raidPtr->numSpare ; c++) {
2166 sparecol = raidPtr->numCol + c;
2167 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2168 /*
2169
2170 XXX this is where we get fancy and map this spare
2171 into it's correct spot in the array.
2172
2173 */
2174 /*
2175
2176 we claim this disk is "optimal" if it's
2177 rf_ds_used_spare, as that means it should be
2178 directly substitutable for the disk it replaced.
2179 We note that too...
2180
2181 */
2182
2183 for(i=0;i<raidPtr->numRow;i++) {
2184 for(j=0;j<raidPtr->numCol;j++) {
2185 if ((raidPtr->Disks[i][j].spareRow ==
2186 r) &&
2187 (raidPtr->Disks[i][j].spareCol ==
2188 sparecol)) {
2189 srow = r;
2190 scol = sparecol;
2191 break;
2192 }
2193 }
2194 }
2195
2196 raidread_component_label(
2197 raidPtr->Disks[r][sparecol].dev,
2198 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2199 &c_label);
2200 /* make sure status is noted */
2201 c_label.version = RF_COMPONENT_LABEL_VERSION;
2202 c_label.mod_counter = raidPtr->mod_counter;
2203 c_label.serial_number = raidPtr->serial_number;
2204 c_label.row = srow;
2205 c_label.column = scol;
2206 c_label.num_rows = raidPtr->numRow;
2207 c_label.num_columns = raidPtr->numCol;
2208 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2209 c_label.status = rf_ds_optimal;
2210 raidwrite_component_label(
2211 raidPtr->Disks[r][sparecol].dev,
2212 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2213 &c_label);
2214 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2215 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2216 }
2217 }
2218
2219 #endif
2220 }
2221
2222
2223 void
2224 rf_update_component_labels( raidPtr )
2225 RF_Raid_t *raidPtr;
2226 {
2227 RF_ComponentLabel_t c_label;
2228 int sparecol;
2229 int r,c;
2230 int i,j;
2231 int srow, scol;
2232
2233 srow = -1;
2234 scol = -1;
2235
2236 /* XXX should do extra checks to make sure things really are clean,
2237 rather than blindly setting the clean bit... */
2238
2239 raidPtr->mod_counter++;
2240
2241 for (r = 0; r < raidPtr->numRow; r++) {
2242 for (c = 0; c < raidPtr->numCol; c++) {
2243 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2244 raidread_component_label(
2245 raidPtr->Disks[r][c].dev,
2246 raidPtr->raid_cinfo[r][c].ci_vp,
2247 &c_label);
2248 /* make sure status is noted */
2249 c_label.status = rf_ds_optimal;
2250 raidwrite_component_label(
2251 raidPtr->Disks[r][c].dev,
2252 raidPtr->raid_cinfo[r][c].ci_vp,
2253 &c_label);
2254 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2255 raidmarkclean(
2256 raidPtr->Disks[r][c].dev,
2257 raidPtr->raid_cinfo[r][c].ci_vp,
2258 raidPtr->mod_counter);
2259 }
2260 }
2261 /* else we don't touch it.. */
2262 #if 0
2263 else if (raidPtr->Disks[r][c].status !=
2264 rf_ds_failed) {
2265 raidread_component_label(
2266 raidPtr->Disks[r][c].dev,
2267 raidPtr->raid_cinfo[r][c].ci_vp,
2268 &c_label);
2269 /* make sure status is noted */
2270 c_label.status =
2271 raidPtr->Disks[r][c].status;
2272 raidwrite_component_label(
2273 raidPtr->Disks[r][c].dev,
2274 raidPtr->raid_cinfo[r][c].ci_vp,
2275 &c_label);
2276 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2277 raidmarkclean(
2278 raidPtr->Disks[r][c].dev,
2279 raidPtr->raid_cinfo[r][c].ci_vp,
2280 raidPtr->mod_counter);
2281 }
2282 }
2283 #endif
2284 }
2285 }
2286
2287 for( c = 0; c < raidPtr->numSpare ; c++) {
2288 sparecol = raidPtr->numCol + c;
2289 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2290 /*
2291
2292 we claim this disk is "optimal" if it's
2293 rf_ds_used_spare, as that means it should be
2294 directly substitutable for the disk it replaced.
2295 We note that too...
2296
2297 */
2298
2299 for(i=0;i<raidPtr->numRow;i++) {
2300 for(j=0;j<raidPtr->numCol;j++) {
2301 if ((raidPtr->Disks[i][j].spareRow ==
2302 0) &&
2303 (raidPtr->Disks[i][j].spareCol ==
2304 sparecol)) {
2305 srow = i;
2306 scol = j;
2307 break;
2308 }
2309 }
2310 }
2311
2312 raidread_component_label(
2313 raidPtr->Disks[0][sparecol].dev,
2314 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2315 &c_label);
2316 /* make sure status is noted */
2317 c_label.version = RF_COMPONENT_LABEL_VERSION;
2318 c_label.mod_counter = raidPtr->mod_counter;
2319 c_label.serial_number = raidPtr->serial_number;
2320 c_label.row = srow;
2321 c_label.column = scol;
2322 c_label.num_rows = raidPtr->numRow;
2323 c_label.num_columns = raidPtr->numCol;
2324 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2325 c_label.status = rf_ds_optimal;
2326 raidwrite_component_label(
2327 raidPtr->Disks[0][sparecol].dev,
2328 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2329 &c_label);
2330 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2331 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2332 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2333 raidPtr->mod_counter);
2334 }
2335 }
2336 }
2337 /* printf("Component labels updated\n"); */
2338 }
2339