rf_netbsdkintf.c revision 1.26 1 /* $NetBSD: rf_netbsdkintf.c,v 1.26 1999/08/14 02:41:36 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 case RAIDFRAME_CHECK_PARITY:
736 if ((rs->sc_flags & RAIDF_INITED) == 0)
737 return (ENXIO);
738 }
739
740 switch (cmd) {
741
742
743 /* configure the system */
744 case RAIDFRAME_CONFIGURE:
745
746 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
747 /* copy-in the configuration information */
748 /* data points to a pointer to the configuration structure */
749 u_cfg = *((RF_Config_t **) data);
750 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
751 if (k_cfg == NULL) {
752 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
753 return (ENOMEM);
754 }
755 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
756 sizeof(RF_Config_t));
757 if (retcode) {
758 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
759 retcode));
760 return (retcode);
761 }
762 /* allocate a buffer for the layout-specific data, and copy it
763 * in */
764 if (k_cfg->layoutSpecificSize) {
765 if (k_cfg->layoutSpecificSize > 10000) {
766 /* sanity check */
767 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
768 return (EINVAL);
769 }
770 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
771 (u_char *));
772 if (specific_buf == NULL) {
773 RF_Free(k_cfg, sizeof(RF_Config_t));
774 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
775 return (ENOMEM);
776 }
777 retcode = copyin(k_cfg->layoutSpecific,
778 (caddr_t) specific_buf,
779 k_cfg->layoutSpecificSize);
780 if (retcode) {
781 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
782 retcode));
783 return (retcode);
784 }
785 } else
786 specific_buf = NULL;
787 k_cfg->layoutSpecific = specific_buf;
788
789 /* should do some kind of sanity check on the configuration.
790 * Store the sum of all the bytes in the last byte? */
791
792 #if 0
793 db1_printf(("Considering configuring the system.:%d 0x%x\n",
794 unit, p));
795 #endif
796
797 /* We need the pointer to this a little deeper, so stash it
798 * here... */
799
800 raidPtrs[unit]->proc = p;
801
802 /* configure the system */
803
804 raidPtrs[unit]->raidid = unit;
805
806 retcode = rf_Configure(raidPtrs[unit], k_cfg);
807
808 /* allow this many simultaneous IO's to this RAID device */
809 raidPtrs[unit]->openings = RAIDOUTSTANDING;
810
811 if (retcode == 0) {
812 retcode = raidinit(dev, raidPtrs[unit], unit);
813 rf_markalldirty( raidPtrs[unit] );
814 }
815 /* free the buffers. No return code here. */
816 if (k_cfg->layoutSpecificSize) {
817 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
818 }
819 RF_Free(k_cfg, sizeof(RF_Config_t));
820
821 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
822 retcode));
823
824 return (retcode);
825
826 /* shutdown the system */
827 case RAIDFRAME_SHUTDOWN:
828
829 if ((error = raidlock(rs)) != 0)
830 return (error);
831
832 /*
833 * If somebody has a partition mounted, we shouldn't
834 * shutdown.
835 */
836
837 part = DISKPART(dev);
838 pmask = (1 << part);
839 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
840 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
841 (rs->sc_dkdev.dk_copenmask & pmask))) {
842 raidunlock(rs);
843 return (EBUSY);
844 }
845
846 if (rf_debugKernelAccess) {
847 printf("call shutdown\n");
848 }
849 raidPtrs[unit]->proc = p; /* XXX necessary evil */
850
851 retcode = rf_Shutdown(raidPtrs[unit]);
852
853 db1_printf(("Done main shutdown\n"));
854
855 pool_destroy(&rs->sc_cbufpool);
856 db1_printf(("Done freeing component buffer freelist\n"));
857
858 /* It's no longer initialized... */
859 rs->sc_flags &= ~RAIDF_INITED;
860
861 /* Detach the disk. */
862 disk_detach(&rs->sc_dkdev);
863
864 raidunlock(rs);
865
866 return (retcode);
867 case RAIDFRAME_GET_COMPONENT_LABEL:
868 c_label_ptr = (RF_ComponentLabel_t **) data;
869 /* need to read the component label for the disk indicated
870 by row,column in component_label
871 XXX need to sanity check these values!!!
872 */
873
874 /* For practice, let's get it directly fromdisk, rather
875 than from the in-core copy */
876 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
877 (RF_ComponentLabel_t *));
878 if (component_label == NULL)
879 return (ENOMEM);
880
881 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
882
883 retcode = copyin( *c_label_ptr, component_label,
884 sizeof(RF_ComponentLabel_t));
885
886 if (retcode) {
887 return(retcode);
888 }
889
890 row = component_label->row;
891 column = component_label->column;
892
893 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
894 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
895 return(EINVAL);
896 }
897
898 raidread_component_label(
899 raidPtrs[unit]->Disks[row][column].dev,
900 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
901 component_label );
902
903 retcode = copyout((caddr_t) component_label,
904 (caddr_t) *c_label_ptr,
905 sizeof(RF_ComponentLabel_t));
906 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
907 return (retcode);
908
909 case RAIDFRAME_SET_COMPONENT_LABEL:
910 component_label = (RF_ComponentLabel_t *) data;
911
912 /* XXX check the label for valid stuff... */
913 /* Note that some things *should not* get modified --
914 the user should be re-initing the labels instead of
915 trying to patch things.
916 */
917
918 printf("Got component label:\n");
919 printf("Version: %d\n",component_label->version);
920 printf("Serial Number: %d\n",component_label->serial_number);
921 printf("Mod counter: %d\n",component_label->mod_counter);
922 printf("Row: %d\n", component_label->row);
923 printf("Column: %d\n", component_label->column);
924 printf("Num Rows: %d\n", component_label->num_rows);
925 printf("Num Columns: %d\n", component_label->num_columns);
926 printf("Clean: %d\n", component_label->clean);
927 printf("Status: %d\n", component_label->status);
928
929 row = component_label->row;
930 column = component_label->column;
931
932 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
933 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
934 return(EINVAL);
935 }
936
937 /* XXX this isn't allowed to do anything for now :-) */
938 #if 0
939 raidwrite_component_label(
940 raidPtrs[unit]->Disks[row][column].dev,
941 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
942 component_label );
943 #endif
944 return (0);
945
946 case RAIDFRAME_INIT_LABELS:
947 component_label = (RF_ComponentLabel_t *) data;
948 /*
949 we only want the serial number from
950 the above. We get all the rest of the information
951 from the config that was used to create this RAID
952 set.
953 */
954
955 raidPtrs[unit]->serial_number = component_label->serial_number;
956 /* current version number */
957 ci_label.version = RF_COMPONENT_LABEL_VERSION;
958 ci_label.serial_number = component_label->serial_number;
959 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
960 ci_label.num_rows = raidPtrs[unit]->numRow;
961 ci_label.num_columns = raidPtrs[unit]->numCol;
962 ci_label.clean = RF_RAID_DIRTY; /* not clean */
963 ci_label.status = rf_ds_optimal; /* "It's good!" */
964
965 for(row=0;row<raidPtrs[unit]->numRow;row++) {
966 ci_label.row = row;
967 for(column=0;column<raidPtrs[unit]->numCol;column++) {
968 ci_label.column = column;
969 raidwrite_component_label(
970 raidPtrs[unit]->Disks[row][column].dev,
971 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
972 &ci_label );
973 }
974 }
975
976 return (retcode);
977
978 /* initialize all parity */
979 case RAIDFRAME_REWRITEPARITY:
980
981 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
982 /* Parity for RAID 0 is trivially correct */
983 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
984 return(0);
985 }
986
987 /* borrow the thread of the requesting process */
988 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
989 s = splbio();
990 retcode = rf_RewriteParity(raidPtrs[unit]);
991 splx(s);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1010 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1011 return(retcode);
1012
1013 case RAIDFRAME_REMOVE_HOT_SPARE:
1014 return(retcode);
1015
1016 case RAIDFRAME_REBUILD_IN_PLACE:
1017
1018 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1019 /* Can't do this on a RAID 0!! */
1020 return(EINVAL);
1021 }
1022
1023 componentPtr = (RF_SingleComponent_t *) data;
1024 memcpy( &component, componentPtr,
1025 sizeof(RF_SingleComponent_t));
1026 row = component.row;
1027 column = component.column;
1028 printf("Rebuild: %d %d\n",row, column);
1029 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1030 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1031 return(EINVAL);
1032 }
1033 printf("Attempting a rebuild in place\n");
1034 s = splbio();
1035 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1036 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1037 splx(s);
1038 return(retcode);
1039
1040 /* issue a test-unit-ready through raidframe to the indicated
1041 * device */
1042 #if 0 /* XXX not supported yet (ever?) */
1043 case RAIDFRAME_TUR:
1044 /* debug only */
1045 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1046 return (retcode);
1047 #endif
1048 case RAIDFRAME_GET_INFO:
1049 {
1050 RF_Raid_t *raid = raidPtrs[unit];
1051 RF_DeviceConfig_t *cfg, **ucfgp;
1052 int i, j, d;
1053
1054 if (!raid->valid)
1055 return (ENODEV);
1056 ucfgp = (RF_DeviceConfig_t **) data;
1057 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1058 (RF_DeviceConfig_t *));
1059 if (cfg == NULL)
1060 return (ENOMEM);
1061 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1062 cfg->rows = raid->numRow;
1063 cfg->cols = raid->numCol;
1064 cfg->ndevs = raid->numRow * raid->numCol;
1065 if (cfg->ndevs >= RF_MAX_DISKS) {
1066 cfg->ndevs = 0;
1067 return (ENOMEM);
1068 }
1069 cfg->nspares = raid->numSpare;
1070 if (cfg->nspares >= RF_MAX_DISKS) {
1071 cfg->nspares = 0;
1072 return (ENOMEM);
1073 }
1074 cfg->maxqdepth = raid->maxQueueDepth;
1075 d = 0;
1076 for (i = 0; i < cfg->rows; i++) {
1077 for (j = 0; j < cfg->cols; j++) {
1078 cfg->devs[d] = raid->Disks[i][j];
1079 d++;
1080 }
1081 }
1082 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1083 cfg->spares[i] = raid->Disks[0][j];
1084 }
1085 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1086 sizeof(RF_DeviceConfig_t));
1087 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1088
1089 return (retcode);
1090 }
1091 break;
1092 case RAIDFRAME_CHECK_PARITY:
1093 *(int *) data = raidPtrs[unit]->parity_good;
1094 return (0);
1095 case RAIDFRAME_RESET_ACCTOTALS:
1096 {
1097 RF_Raid_t *raid = raidPtrs[unit];
1098
1099 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1100 return (0);
1101 }
1102 break;
1103
1104 case RAIDFRAME_GET_ACCTOTALS:
1105 {
1106 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1107 RF_Raid_t *raid = raidPtrs[unit];
1108
1109 *totals = raid->acc_totals;
1110 return (0);
1111 }
1112 break;
1113
1114 case RAIDFRAME_KEEP_ACCTOTALS:
1115 {
1116 RF_Raid_t *raid = raidPtrs[unit];
1117 int *keep = (int *) data;
1118
1119 raid->keep_acc_totals = *keep;
1120 return (0);
1121 }
1122 break;
1123
1124 case RAIDFRAME_GET_SIZE:
1125 *(int *) data = raidPtrs[unit]->totalSectors;
1126 return (0);
1127
1128 #define RAIDFRAME_RECON 1
1129 /* XXX The above should probably be set somewhere else!! GO */
1130 #if RAIDFRAME_RECON > 0
1131
1132 /* fail a disk & optionally start reconstruction */
1133 case RAIDFRAME_FAIL_DISK:
1134
1135 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1136 /* Can't do this on a RAID 0!! */
1137 return(EINVAL);
1138 }
1139
1140 rr = (struct rf_recon_req *) data;
1141
1142 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1143 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1144 return (EINVAL);
1145
1146 printf("raid%d: Failing the disk: row: %d col: %d\n",
1147 unit, rr->row, rr->col);
1148
1149 /* make a copy of the recon request so that we don't rely on
1150 * the user's buffer */
1151 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1152 bcopy(rr, rrcopy, sizeof(*rr));
1153 rrcopy->raidPtr = (void *) raidPtrs[unit];
1154
1155 LOCK_RECON_Q_MUTEX();
1156 rrcopy->next = recon_queue;
1157 recon_queue = rrcopy;
1158 wakeup(&recon_queue);
1159 UNLOCK_RECON_Q_MUTEX();
1160
1161 return (0);
1162
1163 /* invoke a copyback operation after recon on whatever disk
1164 * needs it, if any */
1165 case RAIDFRAME_COPYBACK:
1166
1167 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1168 /* This makes no sense on a RAID 0!! */
1169 return(EINVAL);
1170 }
1171
1172 /* borrow the current thread to get this done */
1173 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1174 s = splbio();
1175 rf_CopybackReconstructedData(raidPtrs[unit]);
1176 splx(s);
1177 return (0);
1178
1179 /* return the percentage completion of reconstruction */
1180 case RAIDFRAME_CHECKRECON:
1181 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1182 /* This makes no sense on a RAID 0 */
1183 return(EINVAL);
1184 }
1185
1186 row = *(int *) data;
1187 if (row < 0 || row >= raidPtrs[unit]->numRow)
1188 return (EINVAL);
1189 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1190 *(int *) data = 100;
1191 else
1192 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1193 return (0);
1194
1195 /* the sparetable daemon calls this to wait for the kernel to
1196 * need a spare table. this ioctl does not return until a
1197 * spare table is needed. XXX -- calling mpsleep here in the
1198 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1199 * -- I should either compute the spare table in the kernel,
1200 * or have a different -- XXX XXX -- interface (a different
1201 * character device) for delivering the table -- XXX */
1202 #if 0
1203 case RAIDFRAME_SPARET_WAIT:
1204 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1205 while (!rf_sparet_wait_queue)
1206 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1207 waitreq = rf_sparet_wait_queue;
1208 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1209 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1210
1211 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1212
1213 RF_Free(waitreq, sizeof(*waitreq));
1214 return (0);
1215
1216
1217 /* wakes up a process waiting on SPARET_WAIT and puts an error
1218 * code in it that will cause the dameon to exit */
1219 case RAIDFRAME_ABORT_SPARET_WAIT:
1220 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1221 waitreq->fcol = -1;
1222 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1223 waitreq->next = rf_sparet_wait_queue;
1224 rf_sparet_wait_queue = waitreq;
1225 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1226 wakeup(&rf_sparet_wait_queue);
1227 return (0);
1228
1229 /* used by the spare table daemon to deliver a spare table
1230 * into the kernel */
1231 case RAIDFRAME_SEND_SPARET:
1232
1233 /* install the spare table */
1234 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1235
1236 /* respond to the requestor. the return status of the spare
1237 * table installation is passed in the "fcol" field */
1238 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1239 waitreq->fcol = retcode;
1240 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1241 waitreq->next = rf_sparet_resp_queue;
1242 rf_sparet_resp_queue = waitreq;
1243 wakeup(&rf_sparet_resp_queue);
1244 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1245
1246 return (retcode);
1247 #endif
1248
1249
1250 #endif /* RAIDFRAME_RECON > 0 */
1251
1252 default:
1253 break; /* fall through to the os-specific code below */
1254
1255 }
1256
1257 if (!raidPtrs[unit]->valid)
1258 return (EINVAL);
1259
1260 /*
1261 * Add support for "regular" device ioctls here.
1262 */
1263
1264 switch (cmd) {
1265 case DIOCGDINFO:
1266 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1267 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1268 break;
1269
1270 case DIOCGPART:
1271 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1272 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1273 ((struct partinfo *) data)->part =
1274 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1275 break;
1276
1277 case DIOCWDINFO:
1278 db1_printf(("DIOCWDINFO\n"));
1279 case DIOCSDINFO:
1280 db1_printf(("DIOCSDINFO\n"));
1281 if ((error = raidlock(rs)) != 0)
1282 return (error);
1283
1284 rs->sc_flags |= RAIDF_LABELLING;
1285
1286 error = setdisklabel(rs->sc_dkdev.dk_label,
1287 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1288 if (error == 0) {
1289 if (cmd == DIOCWDINFO)
1290 error = writedisklabel(RAIDLABELDEV(dev),
1291 raidstrategy, rs->sc_dkdev.dk_label,
1292 rs->sc_dkdev.dk_cpulabel);
1293 }
1294 rs->sc_flags &= ~RAIDF_LABELLING;
1295
1296 raidunlock(rs);
1297
1298 if (error)
1299 return (error);
1300 break;
1301
1302 case DIOCWLABEL:
1303 db1_printf(("DIOCWLABEL\n"));
1304 if (*(int *) data != 0)
1305 rs->sc_flags |= RAIDF_WLABEL;
1306 else
1307 rs->sc_flags &= ~RAIDF_WLABEL;
1308 break;
1309
1310 case DIOCGDEFLABEL:
1311 db1_printf(("DIOCGDEFLABEL\n"));
1312 raidgetdefaultlabel(raidPtrs[unit], rs,
1313 (struct disklabel *) data);
1314 break;
1315
1316 default:
1317 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1318 }
1319 return (retcode);
1320
1321 }
1322
1323
1324 /* raidinit -- complete the rest of the initialization for the
1325 RAIDframe device. */
1326
1327
1328 static int
1329 raidinit(dev, raidPtr, unit)
1330 dev_t dev;
1331 RF_Raid_t *raidPtr;
1332 int unit;
1333 {
1334 int retcode;
1335 /* int ix; */
1336 /* struct raidbuf *raidbp; */
1337 struct raid_softc *rs;
1338
1339 retcode = 0;
1340
1341 rs = &raid_softc[unit];
1342 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1343 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1344
1345
1346 /* XXX should check return code first... */
1347 rs->sc_flags |= RAIDF_INITED;
1348
1349 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1350
1351 rs->sc_dkdev.dk_name = rs->sc_xname;
1352
1353 /* disk_attach actually creates space for the CPU disklabel, among
1354 * other things, so it's critical to call this *BEFORE* we try putzing
1355 * with disklabels. */
1356
1357 disk_attach(&rs->sc_dkdev);
1358
1359 /* XXX There may be a weird interaction here between this, and
1360 * protectedSectors, as used in RAIDframe. */
1361
1362 rs->sc_size = raidPtr->totalSectors;
1363 rs->sc_dev = dev;
1364
1365 return (retcode);
1366 }
1367
1368 /*
1369 * This kernel thread never exits. It is created once, and persists
1370 * until the system reboots.
1371 */
1372
1373 void
1374 rf_ReconKernelThread()
1375 {
1376 struct rf_recon_req *req;
1377 int s;
1378
1379 /* XXX not sure what spl() level we should be at here... probably
1380 * splbio() */
1381 s = splbio();
1382
1383 while (1) {
1384 /* grab the next reconstruction request from the queue */
1385 LOCK_RECON_Q_MUTEX();
1386 while (!recon_queue) {
1387 UNLOCK_RECON_Q_MUTEX();
1388 tsleep(&recon_queue, PRIBIO,
1389 "raidframe recon", 0);
1390 LOCK_RECON_Q_MUTEX();
1391 }
1392 req = recon_queue;
1393 recon_queue = recon_queue->next;
1394 UNLOCK_RECON_Q_MUTEX();
1395
1396 /*
1397 * If flags specifies that we should start recon, this call
1398 * will not return until reconstruction completes, fails,
1399 * or is aborted.
1400 */
1401 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1402 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1403
1404 RF_Free(req, sizeof(*req));
1405 }
1406 }
1407 /* wake up the daemon & tell it to get us a spare table
1408 * XXX
1409 * the entries in the queues should be tagged with the raidPtr
1410 * so that in the extremely rare case that two recons happen at once,
1411 * we know for which device were requesting a spare table
1412 * XXX
1413 */
1414 int
1415 rf_GetSpareTableFromDaemon(req)
1416 RF_SparetWait_t *req;
1417 {
1418 int retcode;
1419
1420 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1421 req->next = rf_sparet_wait_queue;
1422 rf_sparet_wait_queue = req;
1423 wakeup(&rf_sparet_wait_queue);
1424
1425 /* mpsleep unlocks the mutex */
1426 while (!rf_sparet_resp_queue) {
1427 tsleep(&rf_sparet_resp_queue, PRIBIO,
1428 "raidframe getsparetable", 0);
1429 #if 0
1430 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1431 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1432 MS_LOCK_SIMPLE);
1433 #endif
1434 }
1435 req = rf_sparet_resp_queue;
1436 rf_sparet_resp_queue = req->next;
1437 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1438
1439 retcode = req->fcol;
1440 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1441 * alloc'd */
1442 return (retcode);
1443 }
1444 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1445 * bp & passes it down.
1446 * any calls originating in the kernel must use non-blocking I/O
1447 * do some extra sanity checking to return "appropriate" error values for
1448 * certain conditions (to make some standard utilities work)
1449 */
1450 int
1451 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1452 RF_Raid_t *raidPtr;
1453 struct buf *bp;
1454 RF_RaidAccessFlags_t flags;
1455 void (*cbFunc) (struct buf *);
1456 void *cbArg;
1457 {
1458 RF_SectorCount_t num_blocks, pb, sum;
1459 RF_RaidAddr_t raid_addr;
1460 int retcode;
1461 struct partition *pp;
1462 daddr_t blocknum;
1463 int unit;
1464 struct raid_softc *rs;
1465 int do_async;
1466
1467 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1468
1469 unit = raidPtr->raidid;
1470 rs = &raid_softc[unit];
1471
1472 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1473 * partition.. Need to make it absolute to the underlying device.. */
1474
1475 blocknum = bp->b_blkno;
1476 if (DISKPART(bp->b_dev) != RAW_PART) {
1477 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1478 blocknum += pp->p_offset;
1479 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1480 pp->p_offset));
1481 } else {
1482 db1_printf(("Is raw..\n"));
1483 }
1484 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1485
1486 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1487 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1488
1489 /* *THIS* is where we adjust what block we're going to... but DO NOT
1490 * TOUCH bp->b_blkno!!! */
1491 raid_addr = blocknum;
1492
1493 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1494 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1495 sum = raid_addr + num_blocks + pb;
1496 if (1 || rf_debugKernelAccess) {
1497 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1498 (int) raid_addr, (int) sum, (int) num_blocks,
1499 (int) pb, (int) bp->b_resid));
1500 }
1501 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1502 || (sum < num_blocks) || (sum < pb)) {
1503 bp->b_error = ENOSPC;
1504 bp->b_flags |= B_ERROR;
1505 bp->b_resid = bp->b_bcount;
1506 biodone(bp);
1507 return (bp->b_error);
1508 }
1509 /*
1510 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1511 */
1512
1513 if (bp->b_bcount & raidPtr->sectorMask) {
1514 bp->b_error = EINVAL;
1515 bp->b_flags |= B_ERROR;
1516 bp->b_resid = bp->b_bcount;
1517 biodone(bp);
1518 return (bp->b_error);
1519 }
1520 db1_printf(("Calling DoAccess..\n"));
1521
1522
1523 /* Put a throttle on the number of requests we handle simultanously */
1524
1525 RF_LOCK_MUTEX(raidPtr->mutex);
1526
1527 while(raidPtr->openings <= 0) {
1528 RF_UNLOCK_MUTEX(raidPtr->mutex);
1529 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1530 RF_LOCK_MUTEX(raidPtr->mutex);
1531 }
1532 raidPtr->openings--;
1533
1534 RF_UNLOCK_MUTEX(raidPtr->mutex);
1535
1536 /*
1537 * Everything is async.
1538 */
1539 do_async = 1;
1540
1541 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1542 * B_READ instead */
1543 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1544 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1545 do_async, raid_addr, num_blocks,
1546 bp->b_un.b_addr,
1547 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1548 NULL, cbFunc, cbArg);
1549 #if 0
1550 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1551 bp->b_data, (int) bp->b_resid));
1552 #endif
1553
1554 return (retcode);
1555 }
1556 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1557
1558 int
1559 rf_DispatchKernelIO(queue, req)
1560 RF_DiskQueue_t *queue;
1561 RF_DiskQueueData_t *req;
1562 {
1563 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1564 struct buf *bp;
1565 struct raidbuf *raidbp = NULL;
1566 struct raid_softc *rs;
1567 int unit;
1568
1569 /* XXX along with the vnode, we also need the softc associated with
1570 * this device.. */
1571
1572 req->queue = queue;
1573
1574 unit = queue->raidPtr->raidid;
1575
1576 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1577
1578 if (unit >= numraid) {
1579 printf("Invalid unit number: %d %d\n", unit, numraid);
1580 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1581 }
1582 rs = &raid_softc[unit];
1583
1584 /* XXX is this the right place? */
1585 disk_busy(&rs->sc_dkdev);
1586
1587 bp = req->bp;
1588 #if 1
1589 /* XXX when there is a physical disk failure, someone is passing us a
1590 * buffer that contains old stuff!! Attempt to deal with this problem
1591 * without taking a performance hit... (not sure where the real bug
1592 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1593
1594 if (bp->b_flags & B_ERROR) {
1595 bp->b_flags &= ~B_ERROR;
1596 }
1597 if (bp->b_error != 0) {
1598 bp->b_error = 0;
1599 }
1600 #endif
1601 raidbp = RAIDGETBUF(rs);
1602
1603 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1604
1605 /*
1606 * context for raidiodone
1607 */
1608 raidbp->rf_obp = bp;
1609 raidbp->req = req;
1610
1611 switch (req->type) {
1612 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1613 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1614 * queue->row, queue->col); */
1615 /* XXX need to do something extra here.. */
1616 /* I'm leaving this in, as I've never actually seen it used,
1617 * and I'd like folks to report it... GO */
1618 printf(("WAKEUP CALLED\n"));
1619 queue->numOutstanding++;
1620
1621 /* XXX need to glue the original buffer into this?? */
1622
1623 KernelWakeupFunc(&raidbp->rf_buf);
1624 break;
1625
1626 case RF_IO_TYPE_READ:
1627 case RF_IO_TYPE_WRITE:
1628
1629 if (req->tracerec) {
1630 RF_ETIMER_START(req->tracerec->timer);
1631 }
1632 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1633 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1634 req->sectorOffset, req->numSector,
1635 req->buf, KernelWakeupFunc, (void *) req,
1636 queue->raidPtr->logBytesPerSector, req->b_proc);
1637
1638 if (rf_debugKernelAccess) {
1639 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1640 (long) bp->b_blkno));
1641 }
1642 queue->numOutstanding++;
1643 queue->last_deq_sector = req->sectorOffset;
1644 /* acc wouldn't have been let in if there were any pending
1645 * reqs at any other priority */
1646 queue->curPriority = req->priority;
1647 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1648 * req->type, queue->row, queue->col); */
1649
1650 db1_printf(("Going for %c to unit %d row %d col %d\n",
1651 req->type, unit, queue->row, queue->col));
1652 db1_printf(("sector %d count %d (%d bytes) %d\n",
1653 (int) req->sectorOffset, (int) req->numSector,
1654 (int) (req->numSector <<
1655 queue->raidPtr->logBytesPerSector),
1656 (int) queue->raidPtr->logBytesPerSector));
1657 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1658 raidbp->rf_buf.b_vp->v_numoutput++;
1659 }
1660 VOP_STRATEGY(&raidbp->rf_buf);
1661
1662 break;
1663
1664 default:
1665 panic("bad req->type in rf_DispatchKernelIO");
1666 }
1667 db1_printf(("Exiting from DispatchKernelIO\n"));
1668 return (0);
1669 }
1670 /* this is the callback function associated with a I/O invoked from
1671 kernel code.
1672 */
1673 static void
1674 KernelWakeupFunc(vbp)
1675 struct buf *vbp;
1676 {
1677 RF_DiskQueueData_t *req = NULL;
1678 RF_DiskQueue_t *queue;
1679 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1680 struct buf *bp;
1681 struct raid_softc *rs;
1682 int unit;
1683 register int s;
1684
1685 s = splbio(); /* XXX */
1686 db1_printf(("recovering the request queue:\n"));
1687 req = raidbp->req;
1688
1689 bp = raidbp->rf_obp;
1690 #if 0
1691 db1_printf(("bp=0x%x\n", bp));
1692 #endif
1693
1694 queue = (RF_DiskQueue_t *) req->queue;
1695
1696 if (raidbp->rf_buf.b_flags & B_ERROR) {
1697 #if 0
1698 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1699 #endif
1700 bp->b_flags |= B_ERROR;
1701 bp->b_error = raidbp->rf_buf.b_error ?
1702 raidbp->rf_buf.b_error : EIO;
1703 }
1704 #if 0
1705 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1706 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1707 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1708 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1709 #endif
1710
1711 /* XXX methinks this could be wrong... */
1712 #if 1
1713 bp->b_resid = raidbp->rf_buf.b_resid;
1714 #endif
1715
1716 if (req->tracerec) {
1717 RF_ETIMER_STOP(req->tracerec->timer);
1718 RF_ETIMER_EVAL(req->tracerec->timer);
1719 RF_LOCK_MUTEX(rf_tracing_mutex);
1720 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1721 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1722 req->tracerec->num_phys_ios++;
1723 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1724 }
1725 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1726
1727 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1728
1729
1730 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1731 * ballistic, and mark the component as hosed... */
1732 #if 1
1733 if (bp->b_flags & B_ERROR) {
1734 /* Mark the disk as dead */
1735 /* but only mark it once... */
1736 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1737 rf_ds_optimal) {
1738 printf("raid%d: IO Error. Marking %s as failed.\n",
1739 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1740 queue->raidPtr->Disks[queue->row][queue->col].status =
1741 rf_ds_failed;
1742 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1743 queue->raidPtr->numFailures++;
1744 /* XXX here we should bump the version number for each component, and write that data out */
1745 } else { /* Disk is already dead... */
1746 /* printf("Disk already marked as dead!\n"); */
1747 }
1748
1749 }
1750 #endif
1751
1752 rs = &raid_softc[unit];
1753 RAIDPUTBUF(rs, raidbp);
1754
1755
1756 if (bp->b_resid == 0) {
1757 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1758 unit, bp->b_resid, bp->b_bcount));
1759 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1760 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1761 } else {
1762 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1763 }
1764
1765 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1766 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1767 /* printf("Exiting KernelWakeupFunc\n"); */
1768
1769 splx(s); /* XXX */
1770 }
1771
1772
1773
1774 /*
1775 * initialize a buf structure for doing an I/O in the kernel.
1776 */
1777 static void
1778 InitBP(
1779 struct buf * bp,
1780 struct vnode * b_vp,
1781 unsigned rw_flag,
1782 dev_t dev,
1783 RF_SectorNum_t startSect,
1784 RF_SectorCount_t numSect,
1785 caddr_t buf,
1786 void (*cbFunc) (struct buf *),
1787 void *cbArg,
1788 int logBytesPerSector,
1789 struct proc * b_proc)
1790 {
1791 /* bp->b_flags = B_PHYS | rw_flag; */
1792 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1793 bp->b_bcount = numSect << logBytesPerSector;
1794 bp->b_bufsize = bp->b_bcount;
1795 bp->b_error = 0;
1796 bp->b_dev = dev;
1797 db1_printf(("bp->b_dev is %d\n", dev));
1798 bp->b_un.b_addr = buf;
1799 #if 0
1800 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1801 #endif
1802
1803 bp->b_blkno = startSect;
1804 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1805 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1806 if (bp->b_bcount == 0) {
1807 panic("bp->b_bcount is zero in InitBP!!\n");
1808 }
1809 bp->b_proc = b_proc;
1810 bp->b_iodone = cbFunc;
1811 bp->b_vp = b_vp;
1812
1813 }
1814 /* Extras... */
1815
1816 #if 0
1817 int
1818 rf_GetSpareTableFromDaemon(req)
1819 RF_SparetWait_t *req;
1820 {
1821 int retcode = 1;
1822 printf("This is supposed to do something useful!!\n"); /* XXX */
1823
1824 return (retcode);
1825
1826 }
1827 #endif
1828
1829 static void
1830 raidgetdefaultlabel(raidPtr, rs, lp)
1831 RF_Raid_t *raidPtr;
1832 struct raid_softc *rs;
1833 struct disklabel *lp;
1834 {
1835 db1_printf(("Building a default label...\n"));
1836 bzero(lp, sizeof(*lp));
1837
1838 /* fabricate a label... */
1839 lp->d_secperunit = raidPtr->totalSectors;
1840 lp->d_secsize = raidPtr->bytesPerSector;
1841 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1842 lp->d_ntracks = 1;
1843 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1844 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1845
1846 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1847 lp->d_type = DTYPE_RAID;
1848 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1849 lp->d_rpm = 3600;
1850 lp->d_interleave = 1;
1851 lp->d_flags = 0;
1852
1853 lp->d_partitions[RAW_PART].p_offset = 0;
1854 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1855 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1856 lp->d_npartitions = RAW_PART + 1;
1857
1858 lp->d_magic = DISKMAGIC;
1859 lp->d_magic2 = DISKMAGIC;
1860 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1861
1862 }
1863 /*
1864 * Read the disklabel from the raid device. If one is not present, fake one
1865 * up.
1866 */
1867 static void
1868 raidgetdisklabel(dev)
1869 dev_t dev;
1870 {
1871 int unit = raidunit(dev);
1872 struct raid_softc *rs = &raid_softc[unit];
1873 char *errstring;
1874 struct disklabel *lp = rs->sc_dkdev.dk_label;
1875 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1876 RF_Raid_t *raidPtr;
1877
1878 db1_printf(("Getting the disklabel...\n"));
1879
1880 bzero(clp, sizeof(*clp));
1881
1882 raidPtr = raidPtrs[unit];
1883
1884 raidgetdefaultlabel(raidPtr, rs, lp);
1885
1886 /*
1887 * Call the generic disklabel extraction routine.
1888 */
1889 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1890 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1891 if (errstring)
1892 raidmakedisklabel(rs);
1893 else {
1894 int i;
1895 struct partition *pp;
1896
1897 /*
1898 * Sanity check whether the found disklabel is valid.
1899 *
1900 * This is necessary since total size of the raid device
1901 * may vary when an interleave is changed even though exactly
1902 * same componets are used, and old disklabel may used
1903 * if that is found.
1904 */
1905 if (lp->d_secperunit != rs->sc_size)
1906 printf("WARNING: %s: "
1907 "total sector size in disklabel (%d) != "
1908 "the size of raid (%ld)\n", rs->sc_xname,
1909 lp->d_secperunit, (long) rs->sc_size);
1910 for (i = 0; i < lp->d_npartitions; i++) {
1911 pp = &lp->d_partitions[i];
1912 if (pp->p_offset + pp->p_size > rs->sc_size)
1913 printf("WARNING: %s: end of partition `%c' "
1914 "exceeds the size of raid (%ld)\n",
1915 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1916 }
1917 }
1918
1919 }
1920 /*
1921 * Take care of things one might want to take care of in the event
1922 * that a disklabel isn't present.
1923 */
1924 static void
1925 raidmakedisklabel(rs)
1926 struct raid_softc *rs;
1927 {
1928 struct disklabel *lp = rs->sc_dkdev.dk_label;
1929 db1_printf(("Making a label..\n"));
1930
1931 /*
1932 * For historical reasons, if there's no disklabel present
1933 * the raw partition must be marked FS_BSDFFS.
1934 */
1935
1936 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1937
1938 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1939
1940 lp->d_checksum = dkcksum(lp);
1941 }
1942 /*
1943 * Lookup the provided name in the filesystem. If the file exists,
1944 * is a valid block device, and isn't being used by anyone else,
1945 * set *vpp to the file's vnode.
1946 * You'll find the original of this in ccd.c
1947 */
1948 int
1949 raidlookup(path, p, vpp)
1950 char *path;
1951 struct proc *p;
1952 struct vnode **vpp; /* result */
1953 {
1954 struct nameidata nd;
1955 struct vnode *vp;
1956 struct vattr va;
1957 int error;
1958
1959 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1960 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1961 #ifdef DEBUG
1962 printf("RAIDframe: vn_open returned %d\n", error);
1963 #endif
1964 return (error);
1965 }
1966 vp = nd.ni_vp;
1967 if (vp->v_usecount > 1) {
1968 VOP_UNLOCK(vp, 0);
1969 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1970 return (EBUSY);
1971 }
1972 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1973 VOP_UNLOCK(vp, 0);
1974 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1975 return (error);
1976 }
1977 /* XXX: eventually we should handle VREG, too. */
1978 if (va.va_type != VBLK) {
1979 VOP_UNLOCK(vp, 0);
1980 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1981 return (ENOTBLK);
1982 }
1983 VOP_UNLOCK(vp, 0);
1984 *vpp = vp;
1985 return (0);
1986 }
1987 /*
1988 * Wait interruptibly for an exclusive lock.
1989 *
1990 * XXX
1991 * Several drivers do this; it should be abstracted and made MP-safe.
1992 * (Hmm... where have we seen this warning before :-> GO )
1993 */
1994 static int
1995 raidlock(rs)
1996 struct raid_softc *rs;
1997 {
1998 int error;
1999
2000 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2001 rs->sc_flags |= RAIDF_WANTED;
2002 if ((error =
2003 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2004 return (error);
2005 }
2006 rs->sc_flags |= RAIDF_LOCKED;
2007 return (0);
2008 }
2009 /*
2010 * Unlock and wake up any waiters.
2011 */
2012 static void
2013 raidunlock(rs)
2014 struct raid_softc *rs;
2015 {
2016
2017 rs->sc_flags &= ~RAIDF_LOCKED;
2018 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2019 rs->sc_flags &= ~RAIDF_WANTED;
2020 wakeup(rs);
2021 }
2022 }
2023
2024
2025 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2026 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2027
2028 int
2029 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2030 {
2031 RF_ComponentLabel_t component_label;
2032 raidread_component_label(dev, b_vp, &component_label);
2033 component_label.mod_counter = mod_counter;
2034 component_label.clean = RF_RAID_CLEAN;
2035 raidwrite_component_label(dev, b_vp, &component_label);
2036 return(0);
2037 }
2038
2039
2040 int
2041 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2042 {
2043 RF_ComponentLabel_t component_label;
2044 raidread_component_label(dev, b_vp, &component_label);
2045 component_label.mod_counter = mod_counter;
2046 component_label.clean = RF_RAID_DIRTY;
2047 raidwrite_component_label(dev, b_vp, &component_label);
2048 return(0);
2049 }
2050
2051 /* ARGSUSED */
2052 int
2053 raidread_component_label(dev, b_vp, component_label)
2054 dev_t dev;
2055 struct vnode *b_vp;
2056 RF_ComponentLabel_t *component_label;
2057 {
2058 struct buf *bp;
2059 int error;
2060
2061 /* XXX should probably ensure that we don't try to do this if
2062 someone has changed rf_protected_sectors. */
2063
2064 /* get a block of the appropriate size... */
2065 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2066 bp->b_dev = dev;
2067
2068 /* get our ducks in a row for the read */
2069 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2070 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2071 bp->b_flags = B_BUSY | B_READ;
2072 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2073
2074 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2075
2076 error = biowait(bp);
2077
2078 if (!error) {
2079 memcpy(component_label, bp->b_un.b_addr,
2080 sizeof(RF_ComponentLabel_t));
2081 #if 0
2082 printf("raidread_component_label: got component label:\n");
2083 printf("Version: %d\n",component_label->version);
2084 printf("Serial Number: %d\n",component_label->serial_number);
2085 printf("Mod counter: %d\n",component_label->mod_counter);
2086 printf("Row: %d\n", component_label->row);
2087 printf("Column: %d\n", component_label->column);
2088 printf("Num Rows: %d\n", component_label->num_rows);
2089 printf("Num Columns: %d\n", component_label->num_columns);
2090 printf("Clean: %d\n", component_label->clean);
2091 printf("Status: %d\n", component_label->status);
2092 #endif
2093 } else {
2094 printf("Failed to read RAID component label!\n");
2095 }
2096
2097 bp->b_flags = B_INVAL | B_AGE;
2098 brelse(bp);
2099 return(error);
2100 }
2101 /* ARGSUSED */
2102 int
2103 raidwrite_component_label(dev, b_vp, component_label)
2104 dev_t dev;
2105 struct vnode *b_vp;
2106 RF_ComponentLabel_t *component_label;
2107 {
2108 struct buf *bp;
2109 int error;
2110
2111 /* get a block of the appropriate size... */
2112 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2113 bp->b_dev = dev;
2114
2115 /* get our ducks in a row for the write */
2116 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2117 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2118 bp->b_flags = B_BUSY | B_WRITE;
2119 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2120
2121 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2122
2123 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2124
2125 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2126 error = biowait(bp);
2127 bp->b_flags = B_INVAL | B_AGE;
2128 brelse(bp);
2129 if (error) {
2130 printf("Failed to write RAID component info!\n");
2131 }
2132
2133 return(error);
2134 }
2135
2136 void
2137 rf_markalldirty( raidPtr )
2138 RF_Raid_t *raidPtr;
2139 {
2140 RF_ComponentLabel_t c_label;
2141 int r,c;
2142
2143 raidPtr->mod_counter++;
2144 for (r = 0; r < raidPtr->numRow; r++) {
2145 for (c = 0; c < raidPtr->numCol; c++) {
2146 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2147 raidread_component_label(
2148 raidPtr->Disks[r][c].dev,
2149 raidPtr->raid_cinfo[r][c].ci_vp,
2150 &c_label);
2151 if (c_label.status == rf_ds_spared) {
2152 /* XXX do something special...
2153 but whatever you do, don't
2154 try to access it!! */
2155 } else {
2156 #if 0
2157 c_label.status =
2158 raidPtr->Disks[r][c].status;
2159 raidwrite_component_label(
2160 raidPtr->Disks[r][c].dev,
2161 raidPtr->raid_cinfo[r][c].ci_vp,
2162 &c_label);
2163 #endif
2164 raidmarkdirty(
2165 raidPtr->Disks[r][c].dev,
2166 raidPtr->raid_cinfo[r][c].ci_vp,
2167 raidPtr->mod_counter);
2168 }
2169 }
2170 }
2171 }
2172 /* printf("Component labels marked dirty.\n"); */
2173 #if 0
2174 for( c = 0; c < raidPtr->numSpare ; c++) {
2175 sparecol = raidPtr->numCol + c;
2176 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2177 /*
2178
2179 XXX this is where we get fancy and map this spare
2180 into it's correct spot in the array.
2181
2182 */
2183 /*
2184
2185 we claim this disk is "optimal" if it's
2186 rf_ds_used_spare, as that means it should be
2187 directly substitutable for the disk it replaced.
2188 We note that too...
2189
2190 */
2191
2192 for(i=0;i<raidPtr->numRow;i++) {
2193 for(j=0;j<raidPtr->numCol;j++) {
2194 if ((raidPtr->Disks[i][j].spareRow ==
2195 r) &&
2196 (raidPtr->Disks[i][j].spareCol ==
2197 sparecol)) {
2198 srow = r;
2199 scol = sparecol;
2200 break;
2201 }
2202 }
2203 }
2204
2205 raidread_component_label(
2206 raidPtr->Disks[r][sparecol].dev,
2207 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2208 &c_label);
2209 /* make sure status is noted */
2210 c_label.version = RF_COMPONENT_LABEL_VERSION;
2211 c_label.mod_counter = raidPtr->mod_counter;
2212 c_label.serial_number = raidPtr->serial_number;
2213 c_label.row = srow;
2214 c_label.column = scol;
2215 c_label.num_rows = raidPtr->numRow;
2216 c_label.num_columns = raidPtr->numCol;
2217 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2218 c_label.status = rf_ds_optimal;
2219 raidwrite_component_label(
2220 raidPtr->Disks[r][sparecol].dev,
2221 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2222 &c_label);
2223 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2224 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2225 }
2226 }
2227
2228 #endif
2229 }
2230
2231
2232 void
2233 rf_update_component_labels( raidPtr )
2234 RF_Raid_t *raidPtr;
2235 {
2236 RF_ComponentLabel_t c_label;
2237 int sparecol;
2238 int r,c;
2239 int i,j;
2240 int srow, scol;
2241
2242 srow = -1;
2243 scol = -1;
2244
2245 /* XXX should do extra checks to make sure things really are clean,
2246 rather than blindly setting the clean bit... */
2247
2248 raidPtr->mod_counter++;
2249
2250 for (r = 0; r < raidPtr->numRow; r++) {
2251 for (c = 0; c < raidPtr->numCol; c++) {
2252 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2253 raidread_component_label(
2254 raidPtr->Disks[r][c].dev,
2255 raidPtr->raid_cinfo[r][c].ci_vp,
2256 &c_label);
2257 /* make sure status is noted */
2258 c_label.status = rf_ds_optimal;
2259 raidwrite_component_label(
2260 raidPtr->Disks[r][c].dev,
2261 raidPtr->raid_cinfo[r][c].ci_vp,
2262 &c_label);
2263 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2264 raidmarkclean(
2265 raidPtr->Disks[r][c].dev,
2266 raidPtr->raid_cinfo[r][c].ci_vp,
2267 raidPtr->mod_counter);
2268 }
2269 }
2270 /* else we don't touch it.. */
2271 #if 0
2272 else if (raidPtr->Disks[r][c].status !=
2273 rf_ds_failed) {
2274 raidread_component_label(
2275 raidPtr->Disks[r][c].dev,
2276 raidPtr->raid_cinfo[r][c].ci_vp,
2277 &c_label);
2278 /* make sure status is noted */
2279 c_label.status =
2280 raidPtr->Disks[r][c].status;
2281 raidwrite_component_label(
2282 raidPtr->Disks[r][c].dev,
2283 raidPtr->raid_cinfo[r][c].ci_vp,
2284 &c_label);
2285 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2286 raidmarkclean(
2287 raidPtr->Disks[r][c].dev,
2288 raidPtr->raid_cinfo[r][c].ci_vp,
2289 raidPtr->mod_counter);
2290 }
2291 }
2292 #endif
2293 }
2294 }
2295
2296 for( c = 0; c < raidPtr->numSpare ; c++) {
2297 sparecol = raidPtr->numCol + c;
2298 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2299 /*
2300
2301 we claim this disk is "optimal" if it's
2302 rf_ds_used_spare, as that means it should be
2303 directly substitutable for the disk it replaced.
2304 We note that too...
2305
2306 */
2307
2308 for(i=0;i<raidPtr->numRow;i++) {
2309 for(j=0;j<raidPtr->numCol;j++) {
2310 if ((raidPtr->Disks[i][j].spareRow ==
2311 0) &&
2312 (raidPtr->Disks[i][j].spareCol ==
2313 sparecol)) {
2314 srow = i;
2315 scol = j;
2316 break;
2317 }
2318 }
2319 }
2320
2321 raidread_component_label(
2322 raidPtr->Disks[0][sparecol].dev,
2323 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2324 &c_label);
2325 /* make sure status is noted */
2326 c_label.version = RF_COMPONENT_LABEL_VERSION;
2327 c_label.mod_counter = raidPtr->mod_counter;
2328 c_label.serial_number = raidPtr->serial_number;
2329 c_label.row = srow;
2330 c_label.column = scol;
2331 c_label.num_rows = raidPtr->numRow;
2332 c_label.num_columns = raidPtr->numCol;
2333 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2334 c_label.status = rf_ds_optimal;
2335 raidwrite_component_label(
2336 raidPtr->Disks[0][sparecol].dev,
2337 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2338 &c_label);
2339 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2340 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2341 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2342 raidPtr->mod_counter);
2343 }
2344 }
2345 }
2346 /* printf("Component labels updated\n"); */
2347 }
2348