rf_netbsdkintf.c revision 1.21 1 /* $NetBSD: rf_netbsdkintf.c,v 1.21 1999/07/21 03:15:26 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 if ((rs->sc_flags & RAIDF_INITED) == 0)
736 return (ENXIO);
737 }
738
739 switch (cmd) {
740
741
742 /* configure the system */
743 case RAIDFRAME_CONFIGURE:
744
745 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
746 /* copy-in the configuration information */
747 /* data points to a pointer to the configuration structure */
748 u_cfg = *((RF_Config_t **) data);
749 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
750 if (k_cfg == NULL) {
751 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
752 return (ENOMEM);
753 }
754 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
755 sizeof(RF_Config_t));
756 if (retcode) {
757 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
758 retcode));
759 return (retcode);
760 }
761 /* allocate a buffer for the layout-specific data, and copy it
762 * in */
763 if (k_cfg->layoutSpecificSize) {
764 if (k_cfg->layoutSpecificSize > 10000) {
765 /* sanity check */
766 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
767 return (EINVAL);
768 }
769 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
770 (u_char *));
771 if (specific_buf == NULL) {
772 RF_Free(k_cfg, sizeof(RF_Config_t));
773 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
774 return (ENOMEM);
775 }
776 retcode = copyin(k_cfg->layoutSpecific,
777 (caddr_t) specific_buf,
778 k_cfg->layoutSpecificSize);
779 if (retcode) {
780 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
781 retcode));
782 return (retcode);
783 }
784 } else
785 specific_buf = NULL;
786 k_cfg->layoutSpecific = specific_buf;
787
788 /* should do some kind of sanity check on the configuration.
789 * Store the sum of all the bytes in the last byte? */
790
791 #if 0
792 db1_printf(("Considering configuring the system.:%d 0x%x\n",
793 unit, p));
794 #endif
795
796 /* We need the pointer to this a little deeper, so stash it
797 * here... */
798
799 raidPtrs[unit]->proc = p;
800
801 /* configure the system */
802
803 raidPtrs[unit]->raidid = unit;
804
805 retcode = rf_Configure(raidPtrs[unit], k_cfg);
806
807 /* allow this many simultaneous IO's to this RAID device */
808 raidPtrs[unit]->openings = RAIDOUTSTANDING;
809
810 if (retcode == 0) {
811 retcode = raidinit(dev, raidPtrs[unit], unit);
812 rf_markalldirty( raidPtrs[unit] );
813 }
814 /* free the buffers. No return code here. */
815 if (k_cfg->layoutSpecificSize) {
816 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
817 }
818 RF_Free(k_cfg, sizeof(RF_Config_t));
819
820 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
821 retcode));
822
823 return (retcode);
824
825 /* shutdown the system */
826 case RAIDFRAME_SHUTDOWN:
827
828 if ((error = raidlock(rs)) != 0)
829 return (error);
830
831 /*
832 * If somebody has a partition mounted, we shouldn't
833 * shutdown.
834 */
835
836 part = DISKPART(dev);
837 pmask = (1 << part);
838 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
839 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
840 (rs->sc_dkdev.dk_copenmask & pmask))) {
841 raidunlock(rs);
842 return (EBUSY);
843 }
844
845 if (rf_debugKernelAccess) {
846 printf("call shutdown\n");
847 }
848 raidPtrs[unit]->proc = p; /* XXX necessary evil */
849
850 retcode = rf_Shutdown(raidPtrs[unit]);
851
852 db1_printf(("Done main shutdown\n"));
853
854 pool_destroy(&rs->sc_cbufpool);
855 db1_printf(("Done freeing component buffer freelist\n"));
856
857 /* It's no longer initialized... */
858 rs->sc_flags &= ~RAIDF_INITED;
859
860 /* Detach the disk. */
861 disk_detach(&rs->sc_dkdev);
862
863 raidunlock(rs);
864
865 return (retcode);
866 case RAIDFRAME_GET_COMPONENT_LABEL:
867 c_label_ptr = (RF_ComponentLabel_t **) data;
868 /* need to read the component label for the disk indicated
869 by row,column in component_label
870 XXX need to sanity check these values!!!
871 */
872
873 /* For practice, let's get it directly fromdisk, rather
874 than from the in-core copy */
875 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
876 (RF_ComponentLabel_t *));
877 if (component_label == NULL)
878 return (ENOMEM);
879
880 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
881
882 retcode = copyin( *c_label_ptr, component_label,
883 sizeof(RF_ComponentLabel_t));
884
885 if (retcode) {
886 return(retcode);
887 }
888
889 row = component_label->row;
890 printf("Row: %d\n",row);
891 if (row > raidPtrs[unit]->numRow) {
892 row = 0; /* XXX */
893 }
894 column = component_label->column;
895 printf("Column: %d\n",column);
896 if (column > raidPtrs[unit]->numCol) {
897 column = 0; /* XXX */
898 }
899
900 raidread_component_label(
901 raidPtrs[unit]->Disks[row][column].dev,
902 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
903 component_label );
904
905 retcode = copyout((caddr_t) component_label,
906 (caddr_t) *c_label_ptr,
907 sizeof(RF_ComponentLabel_t));
908 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
909 return (retcode);
910
911 case RAIDFRAME_SET_COMPONENT_LABEL:
912 component_label = (RF_ComponentLabel_t *) data;
913
914 /* XXX check the label for valid stuff... */
915 /* Note that some things *should not* get modified --
916 the user should be re-initing the labels instead of
917 trying to patch things.
918 */
919
920 printf("Got component label:\n");
921 printf("Version: %d\n",component_label->version);
922 printf("Serial Number: %d\n",component_label->serial_number);
923 printf("Mod counter: %d\n",component_label->mod_counter);
924 printf("Row: %d\n", component_label->row);
925 printf("Column: %d\n", component_label->column);
926 printf("Num Rows: %d\n", component_label->num_rows);
927 printf("Num Columns: %d\n", component_label->num_columns);
928 printf("Clean: %d\n", component_label->clean);
929 printf("Status: %d\n", component_label->status);
930
931 row = component_label->row;
932 column = component_label->column;
933
934 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
935 (column < 0) || (column > raidPtrs[unit]->numCol)) {
936 return(EINVAL);
937 }
938
939 /* XXX this isn't allowed to do anything for now :-) */
940 #if 0
941 raidwrite_component_label(
942 raidPtrs[unit]->Disks[row][column].dev,
943 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
944 component_label );
945 #endif
946 return (0);
947
948 case RAIDFRAME_INIT_LABELS:
949 component_label = (RF_ComponentLabel_t *) data;
950 /*
951 we only want the serial number from
952 the above. We get all the rest of the information
953 from the config that was used to create this RAID
954 set.
955 */
956
957 raidPtrs[unit]->serial_number = component_label->serial_number;
958 /* current version number */
959 ci_label.version = RF_COMPONENT_LABEL_VERSION;
960 ci_label.serial_number = component_label->serial_number;
961 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
962 ci_label.num_rows = raidPtrs[unit]->numRow;
963 ci_label.num_columns = raidPtrs[unit]->numCol;
964 ci_label.clean = RF_RAID_DIRTY; /* not clean */
965 ci_label.status = rf_ds_optimal; /* "It's good!" */
966
967 for(row=0;row<raidPtrs[unit]->numRow;row++) {
968 ci_label.row = row;
969 for(column=0;column<raidPtrs[unit]->numCol;column++) {
970 ci_label.column = column;
971 raidwrite_component_label(
972 raidPtrs[unit]->Disks[row][column].dev,
973 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
974 &ci_label );
975 }
976 }
977
978 return (retcode);
979
980 /* initialize all parity */
981 case RAIDFRAME_REWRITEPARITY:
982
983 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
984 /* Parity for RAID 0 is trivially correct */
985 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
986 return(0);
987 }
988
989 /* borrow the thread of the requesting process */
990 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
991 retcode = rf_RewriteParity(raidPtrs[unit]);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1010 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1011 return(retcode);
1012
1013 case RAIDFRAME_REMOVE_HOT_SPARE:
1014 return(retcode);
1015
1016 case RAIDFRAME_REBUILD_IN_PLACE:
1017 componentPtr = (RF_SingleComponent_t *) data;
1018 memcpy( &component, componentPtr,
1019 sizeof(RF_SingleComponent_t));
1020 row = component.row;
1021 column = component.column;
1022 printf("Rebuild: %d %d\n",row, column);
1023 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1024 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1025 return(EINVAL);
1026 }
1027 printf("Attempting a rebuild in place\n");
1028 s = splbio();
1029 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1030 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1031 splx(s);
1032 return(retcode);
1033
1034 /* issue a test-unit-ready through raidframe to the indicated
1035 * device */
1036 #if 0 /* XXX not supported yet (ever?) */
1037 case RAIDFRAME_TUR:
1038 /* debug only */
1039 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1040 return (retcode);
1041 #endif
1042 case RAIDFRAME_GET_INFO:
1043 {
1044 RF_Raid_t *raid = raidPtrs[unit];
1045 RF_DeviceConfig_t *cfg, **ucfgp;
1046 int i, j, d;
1047
1048 if (!raid->valid)
1049 return (ENODEV);
1050 ucfgp = (RF_DeviceConfig_t **) data;
1051 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1052 (RF_DeviceConfig_t *));
1053 if (cfg == NULL)
1054 return (ENOMEM);
1055 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1056 cfg->rows = raid->numRow;
1057 cfg->cols = raid->numCol;
1058 cfg->ndevs = raid->numRow * raid->numCol;
1059 if (cfg->ndevs >= RF_MAX_DISKS) {
1060 cfg->ndevs = 0;
1061 return (ENOMEM);
1062 }
1063 cfg->nspares = raid->numSpare;
1064 if (cfg->nspares >= RF_MAX_DISKS) {
1065 cfg->nspares = 0;
1066 return (ENOMEM);
1067 }
1068 cfg->maxqdepth = raid->maxQueueDepth;
1069 d = 0;
1070 for (i = 0; i < cfg->rows; i++) {
1071 for (j = 0; j < cfg->cols; j++) {
1072 cfg->devs[d] = raid->Disks[i][j];
1073 d++;
1074 }
1075 }
1076 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1077 cfg->spares[i] = raid->Disks[0][j];
1078 }
1079 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1080 sizeof(RF_DeviceConfig_t));
1081 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1082
1083 return (retcode);
1084 }
1085 break;
1086
1087 case RAIDFRAME_RESET_ACCTOTALS:
1088 {
1089 RF_Raid_t *raid = raidPtrs[unit];
1090
1091 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1092 return (0);
1093 }
1094 break;
1095
1096 case RAIDFRAME_GET_ACCTOTALS:
1097 {
1098 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1099 RF_Raid_t *raid = raidPtrs[unit];
1100
1101 *totals = raid->acc_totals;
1102 return (0);
1103 }
1104 break;
1105
1106 case RAIDFRAME_KEEP_ACCTOTALS:
1107 {
1108 RF_Raid_t *raid = raidPtrs[unit];
1109 int *keep = (int *) data;
1110
1111 raid->keep_acc_totals = *keep;
1112 return (0);
1113 }
1114 break;
1115
1116 case RAIDFRAME_GET_SIZE:
1117 *(int *) data = raidPtrs[unit]->totalSectors;
1118 return (0);
1119
1120 #define RAIDFRAME_RECON 1
1121 /* XXX The above should probably be set somewhere else!! GO */
1122 #if RAIDFRAME_RECON > 0
1123
1124 /* fail a disk & optionally start reconstruction */
1125 case RAIDFRAME_FAIL_DISK:
1126 rr = (struct rf_recon_req *) data;
1127
1128 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1129 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1130 return (EINVAL);
1131
1132 printf("raid%d: Failing the disk: row: %d col: %d\n",
1133 unit, rr->row, rr->col);
1134
1135 /* make a copy of the recon request so that we don't rely on
1136 * the user's buffer */
1137 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1138 bcopy(rr, rrcopy, sizeof(*rr));
1139 rrcopy->raidPtr = (void *) raidPtrs[unit];
1140
1141 LOCK_RECON_Q_MUTEX();
1142 rrcopy->next = recon_queue;
1143 recon_queue = rrcopy;
1144 wakeup(&recon_queue);
1145 UNLOCK_RECON_Q_MUTEX();
1146
1147 return (0);
1148
1149 /* invoke a copyback operation after recon on whatever disk
1150 * needs it, if any */
1151 case RAIDFRAME_COPYBACK:
1152 /* borrow the current thread to get this done */
1153 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1154 s = splbio();
1155 rf_CopybackReconstructedData(raidPtrs[unit]);
1156 splx(s);
1157 return (0);
1158
1159 /* return the percentage completion of reconstruction */
1160 case RAIDFRAME_CHECKRECON:
1161 row = *(int *) data;
1162 if (row < 0 || row >= raidPtrs[unit]->numRow)
1163 return (EINVAL);
1164 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1165 *(int *) data = 100;
1166 else
1167 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1168 return (0);
1169
1170 /* the sparetable daemon calls this to wait for the kernel to
1171 * need a spare table. this ioctl does not return until a
1172 * spare table is needed. XXX -- calling mpsleep here in the
1173 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1174 * -- I should either compute the spare table in the kernel,
1175 * or have a different -- XXX XXX -- interface (a different
1176 * character device) for delivering the table -- XXX */
1177 #if 0
1178 case RAIDFRAME_SPARET_WAIT:
1179 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1180 while (!rf_sparet_wait_queue)
1181 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1182 waitreq = rf_sparet_wait_queue;
1183 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1184 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1185
1186 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1187
1188 RF_Free(waitreq, sizeof(*waitreq));
1189 return (0);
1190
1191
1192 /* wakes up a process waiting on SPARET_WAIT and puts an error
1193 * code in it that will cause the dameon to exit */
1194 case RAIDFRAME_ABORT_SPARET_WAIT:
1195 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1196 waitreq->fcol = -1;
1197 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1198 waitreq->next = rf_sparet_wait_queue;
1199 rf_sparet_wait_queue = waitreq;
1200 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1201 wakeup(&rf_sparet_wait_queue);
1202 return (0);
1203
1204 /* used by the spare table daemon to deliver a spare table
1205 * into the kernel */
1206 case RAIDFRAME_SEND_SPARET:
1207
1208 /* install the spare table */
1209 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1210
1211 /* respond to the requestor. the return status of the spare
1212 * table installation is passed in the "fcol" field */
1213 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1214 waitreq->fcol = retcode;
1215 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1216 waitreq->next = rf_sparet_resp_queue;
1217 rf_sparet_resp_queue = waitreq;
1218 wakeup(&rf_sparet_resp_queue);
1219 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1220
1221 return (retcode);
1222 #endif
1223
1224
1225 #endif /* RAIDFRAME_RECON > 0 */
1226
1227 default:
1228 break; /* fall through to the os-specific code below */
1229
1230 }
1231
1232 if (!raidPtrs[unit]->valid)
1233 return (EINVAL);
1234
1235 /*
1236 * Add support for "regular" device ioctls here.
1237 */
1238
1239 switch (cmd) {
1240 case DIOCGDINFO:
1241 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1242 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1243 break;
1244
1245 case DIOCGPART:
1246 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1247 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1248 ((struct partinfo *) data)->part =
1249 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1250 break;
1251
1252 case DIOCWDINFO:
1253 db1_printf(("DIOCWDINFO\n"));
1254 case DIOCSDINFO:
1255 db1_printf(("DIOCSDINFO\n"));
1256 if ((error = raidlock(rs)) != 0)
1257 return (error);
1258
1259 rs->sc_flags |= RAIDF_LABELLING;
1260
1261 error = setdisklabel(rs->sc_dkdev.dk_label,
1262 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1263 if (error == 0) {
1264 if (cmd == DIOCWDINFO)
1265 error = writedisklabel(RAIDLABELDEV(dev),
1266 raidstrategy, rs->sc_dkdev.dk_label,
1267 rs->sc_dkdev.dk_cpulabel);
1268 }
1269 rs->sc_flags &= ~RAIDF_LABELLING;
1270
1271 raidunlock(rs);
1272
1273 if (error)
1274 return (error);
1275 break;
1276
1277 case DIOCWLABEL:
1278 db1_printf(("DIOCWLABEL\n"));
1279 if (*(int *) data != 0)
1280 rs->sc_flags |= RAIDF_WLABEL;
1281 else
1282 rs->sc_flags &= ~RAIDF_WLABEL;
1283 break;
1284
1285 case DIOCGDEFLABEL:
1286 db1_printf(("DIOCGDEFLABEL\n"));
1287 raidgetdefaultlabel(raidPtrs[unit], rs,
1288 (struct disklabel *) data);
1289 break;
1290
1291 default:
1292 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1293 }
1294 return (retcode);
1295
1296 }
1297
1298
1299 /* raidinit -- complete the rest of the initialization for the
1300 RAIDframe device. */
1301
1302
1303 static int
1304 raidinit(dev, raidPtr, unit)
1305 dev_t dev;
1306 RF_Raid_t *raidPtr;
1307 int unit;
1308 {
1309 int retcode;
1310 /* int ix; */
1311 /* struct raidbuf *raidbp; */
1312 struct raid_softc *rs;
1313
1314 retcode = 0;
1315
1316 rs = &raid_softc[unit];
1317 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1318 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1319
1320
1321 /* XXX should check return code first... */
1322 rs->sc_flags |= RAIDF_INITED;
1323
1324 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1325
1326 rs->sc_dkdev.dk_name = rs->sc_xname;
1327
1328 /* disk_attach actually creates space for the CPU disklabel, among
1329 * other things, so it's critical to call this *BEFORE* we try putzing
1330 * with disklabels. */
1331
1332 disk_attach(&rs->sc_dkdev);
1333
1334 /* XXX There may be a weird interaction here between this, and
1335 * protectedSectors, as used in RAIDframe. */
1336
1337 rs->sc_size = raidPtr->totalSectors;
1338 rs->sc_dev = dev;
1339
1340 return (retcode);
1341 }
1342
1343 /*
1344 * This kernel thread never exits. It is created once, and persists
1345 * until the system reboots.
1346 */
1347
1348 void
1349 rf_ReconKernelThread()
1350 {
1351 struct rf_recon_req *req;
1352 int s;
1353
1354 /* XXX not sure what spl() level we should be at here... probably
1355 * splbio() */
1356 s = splbio();
1357
1358 while (1) {
1359 /* grab the next reconstruction request from the queue */
1360 LOCK_RECON_Q_MUTEX();
1361 while (!recon_queue) {
1362 UNLOCK_RECON_Q_MUTEX();
1363 tsleep(&recon_queue, PRIBIO,
1364 "raidframe recon", 0);
1365 LOCK_RECON_Q_MUTEX();
1366 }
1367 req = recon_queue;
1368 recon_queue = recon_queue->next;
1369 UNLOCK_RECON_Q_MUTEX();
1370
1371 /*
1372 * If flags specifies that we should start recon, this call
1373 * will not return until reconstruction completes, fails,
1374 * or is aborted.
1375 */
1376 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1377 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1378
1379 RF_Free(req, sizeof(*req));
1380 }
1381 }
1382 /* wake up the daemon & tell it to get us a spare table
1383 * XXX
1384 * the entries in the queues should be tagged with the raidPtr
1385 * so that in the extremely rare case that two recons happen at once,
1386 * we know for which device were requesting a spare table
1387 * XXX
1388 */
1389 int
1390 rf_GetSpareTableFromDaemon(req)
1391 RF_SparetWait_t *req;
1392 {
1393 int retcode;
1394
1395 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1396 req->next = rf_sparet_wait_queue;
1397 rf_sparet_wait_queue = req;
1398 wakeup(&rf_sparet_wait_queue);
1399
1400 /* mpsleep unlocks the mutex */
1401 while (!rf_sparet_resp_queue) {
1402 tsleep(&rf_sparet_resp_queue, PRIBIO,
1403 "raidframe getsparetable", 0);
1404 #if 0
1405 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1406 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1407 MS_LOCK_SIMPLE);
1408 #endif
1409 }
1410 req = rf_sparet_resp_queue;
1411 rf_sparet_resp_queue = req->next;
1412 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1413
1414 retcode = req->fcol;
1415 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1416 * alloc'd */
1417 return (retcode);
1418 }
1419 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1420 * bp & passes it down.
1421 * any calls originating in the kernel must use non-blocking I/O
1422 * do some extra sanity checking to return "appropriate" error values for
1423 * certain conditions (to make some standard utilities work)
1424 */
1425 int
1426 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1427 RF_Raid_t *raidPtr;
1428 struct buf *bp;
1429 RF_RaidAccessFlags_t flags;
1430 void (*cbFunc) (struct buf *);
1431 void *cbArg;
1432 {
1433 RF_SectorCount_t num_blocks, pb, sum;
1434 RF_RaidAddr_t raid_addr;
1435 int retcode;
1436 struct partition *pp;
1437 daddr_t blocknum;
1438 int unit;
1439 struct raid_softc *rs;
1440 int do_async;
1441
1442 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1443
1444 unit = raidPtr->raidid;
1445 rs = &raid_softc[unit];
1446
1447 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1448 * partition.. Need to make it absolute to the underlying device.. */
1449
1450 blocknum = bp->b_blkno;
1451 if (DISKPART(bp->b_dev) != RAW_PART) {
1452 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1453 blocknum += pp->p_offset;
1454 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1455 pp->p_offset));
1456 } else {
1457 db1_printf(("Is raw..\n"));
1458 }
1459 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1460
1461 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1462 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1463
1464 /* *THIS* is where we adjust what block we're going to... but DO NOT
1465 * TOUCH bp->b_blkno!!! */
1466 raid_addr = blocknum;
1467
1468 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1469 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1470 sum = raid_addr + num_blocks + pb;
1471 if (1 || rf_debugKernelAccess) {
1472 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1473 (int) raid_addr, (int) sum, (int) num_blocks,
1474 (int) pb, (int) bp->b_resid));
1475 }
1476 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1477 || (sum < num_blocks) || (sum < pb)) {
1478 bp->b_error = ENOSPC;
1479 bp->b_flags |= B_ERROR;
1480 bp->b_resid = bp->b_bcount;
1481 biodone(bp);
1482 return (bp->b_error);
1483 }
1484 /*
1485 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1486 */
1487
1488 if (bp->b_bcount & raidPtr->sectorMask) {
1489 bp->b_error = EINVAL;
1490 bp->b_flags |= B_ERROR;
1491 bp->b_resid = bp->b_bcount;
1492 biodone(bp);
1493 return (bp->b_error);
1494 }
1495 db1_printf(("Calling DoAccess..\n"));
1496
1497
1498 /* Put a throttle on the number of requests we handle simultanously */
1499
1500 RF_LOCK_MUTEX(raidPtr->mutex);
1501
1502 while(raidPtr->openings <= 0) {
1503 RF_UNLOCK_MUTEX(raidPtr->mutex);
1504 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1505 RF_LOCK_MUTEX(raidPtr->mutex);
1506 }
1507 raidPtr->openings--;
1508
1509 RF_UNLOCK_MUTEX(raidPtr->mutex);
1510
1511 /*
1512 * Everything is async.
1513 */
1514 do_async = 1;
1515
1516 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1517 * B_READ instead */
1518 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1519 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1520 do_async, raid_addr, num_blocks,
1521 bp->b_un.b_addr,
1522 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1523 NULL, cbFunc, cbArg);
1524 #if 0
1525 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1526 bp->b_data, (int) bp->b_resid));
1527 #endif
1528
1529 return (retcode);
1530 }
1531 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1532
1533 int
1534 rf_DispatchKernelIO(queue, req)
1535 RF_DiskQueue_t *queue;
1536 RF_DiskQueueData_t *req;
1537 {
1538 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1539 struct buf *bp;
1540 struct raidbuf *raidbp = NULL;
1541 struct raid_softc *rs;
1542 int unit;
1543
1544 /* XXX along with the vnode, we also need the softc associated with
1545 * this device.. */
1546
1547 req->queue = queue;
1548
1549 unit = queue->raidPtr->raidid;
1550
1551 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1552
1553 if (unit >= numraid) {
1554 printf("Invalid unit number: %d %d\n", unit, numraid);
1555 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1556 }
1557 rs = &raid_softc[unit];
1558
1559 /* XXX is this the right place? */
1560 disk_busy(&rs->sc_dkdev);
1561
1562 bp = req->bp;
1563 #if 1
1564 /* XXX when there is a physical disk failure, someone is passing us a
1565 * buffer that contains old stuff!! Attempt to deal with this problem
1566 * without taking a performance hit... (not sure where the real bug
1567 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1568
1569 if (bp->b_flags & B_ERROR) {
1570 bp->b_flags &= ~B_ERROR;
1571 }
1572 if (bp->b_error != 0) {
1573 bp->b_error = 0;
1574 }
1575 #endif
1576 raidbp = RAIDGETBUF(rs);
1577
1578 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1579
1580 /*
1581 * context for raidiodone
1582 */
1583 raidbp->rf_obp = bp;
1584 raidbp->req = req;
1585
1586 switch (req->type) {
1587 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1588 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1589 * queue->row, queue->col); */
1590 /* XXX need to do something extra here.. */
1591 /* I'm leaving this in, as I've never actually seen it used,
1592 * and I'd like folks to report it... GO */
1593 printf(("WAKEUP CALLED\n"));
1594 queue->numOutstanding++;
1595
1596 /* XXX need to glue the original buffer into this?? */
1597
1598 KernelWakeupFunc(&raidbp->rf_buf);
1599 break;
1600
1601 case RF_IO_TYPE_READ:
1602 case RF_IO_TYPE_WRITE:
1603
1604 if (req->tracerec) {
1605 RF_ETIMER_START(req->tracerec->timer);
1606 }
1607 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1608 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1609 req->sectorOffset, req->numSector,
1610 req->buf, KernelWakeupFunc, (void *) req,
1611 queue->raidPtr->logBytesPerSector, req->b_proc);
1612
1613 if (rf_debugKernelAccess) {
1614 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1615 (long) bp->b_blkno));
1616 }
1617 queue->numOutstanding++;
1618 queue->last_deq_sector = req->sectorOffset;
1619 /* acc wouldn't have been let in if there were any pending
1620 * reqs at any other priority */
1621 queue->curPriority = req->priority;
1622 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1623 * req->type, queue->row, queue->col); */
1624
1625 db1_printf(("Going for %c to unit %d row %d col %d\n",
1626 req->type, unit, queue->row, queue->col));
1627 db1_printf(("sector %d count %d (%d bytes) %d\n",
1628 (int) req->sectorOffset, (int) req->numSector,
1629 (int) (req->numSector <<
1630 queue->raidPtr->logBytesPerSector),
1631 (int) queue->raidPtr->logBytesPerSector));
1632 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1633 raidbp->rf_buf.b_vp->v_numoutput++;
1634 }
1635 VOP_STRATEGY(&raidbp->rf_buf);
1636
1637 break;
1638
1639 default:
1640 panic("bad req->type in rf_DispatchKernelIO");
1641 }
1642 db1_printf(("Exiting from DispatchKernelIO\n"));
1643 return (0);
1644 }
1645 /* this is the callback function associated with a I/O invoked from
1646 kernel code.
1647 */
1648 static void
1649 KernelWakeupFunc(vbp)
1650 struct buf *vbp;
1651 {
1652 RF_DiskQueueData_t *req = NULL;
1653 RF_DiskQueue_t *queue;
1654 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1655 struct buf *bp;
1656 struct raid_softc *rs;
1657 int unit;
1658 register int s;
1659
1660 s = splbio(); /* XXX */
1661 db1_printf(("recovering the request queue:\n"));
1662 req = raidbp->req;
1663
1664 bp = raidbp->rf_obp;
1665 #if 0
1666 db1_printf(("bp=0x%x\n", bp));
1667 #endif
1668
1669 queue = (RF_DiskQueue_t *) req->queue;
1670
1671 if (raidbp->rf_buf.b_flags & B_ERROR) {
1672 #if 0
1673 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1674 #endif
1675 bp->b_flags |= B_ERROR;
1676 bp->b_error = raidbp->rf_buf.b_error ?
1677 raidbp->rf_buf.b_error : EIO;
1678 }
1679 #if 0
1680 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1681 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1682 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1683 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1684 #endif
1685
1686 /* XXX methinks this could be wrong... */
1687 #if 1
1688 bp->b_resid = raidbp->rf_buf.b_resid;
1689 #endif
1690
1691 if (req->tracerec) {
1692 RF_ETIMER_STOP(req->tracerec->timer);
1693 RF_ETIMER_EVAL(req->tracerec->timer);
1694 RF_LOCK_MUTEX(rf_tracing_mutex);
1695 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1696 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1697 req->tracerec->num_phys_ios++;
1698 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1699 }
1700 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1701
1702 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1703
1704
1705 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1706 * ballistic, and mark the component as hosed... */
1707 #if 1
1708 if (bp->b_flags & B_ERROR) {
1709 /* Mark the disk as dead */
1710 /* but only mark it once... */
1711 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1712 rf_ds_optimal) {
1713 printf("raid%d: IO Error. Marking %s as failed.\n",
1714 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1715 queue->raidPtr->Disks[queue->row][queue->col].status =
1716 rf_ds_failed;
1717 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1718 queue->raidPtr->numFailures++;
1719 /* XXX here we should bump the version number for each component, and write that data out */
1720 } else { /* Disk is already dead... */
1721 /* printf("Disk already marked as dead!\n"); */
1722 }
1723
1724 }
1725 #endif
1726
1727 rs = &raid_softc[unit];
1728 RAIDPUTBUF(rs, raidbp);
1729
1730
1731 if (bp->b_resid == 0) {
1732 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1733 unit, bp->b_resid, bp->b_bcount));
1734 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1735 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1736 } else {
1737 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1738 }
1739
1740 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1741 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1742 /* printf("Exiting KernelWakeupFunc\n"); */
1743
1744 splx(s); /* XXX */
1745 }
1746
1747
1748
1749 /*
1750 * initialize a buf structure for doing an I/O in the kernel.
1751 */
1752 static void
1753 InitBP(
1754 struct buf * bp,
1755 struct vnode * b_vp,
1756 unsigned rw_flag,
1757 dev_t dev,
1758 RF_SectorNum_t startSect,
1759 RF_SectorCount_t numSect,
1760 caddr_t buf,
1761 void (*cbFunc) (struct buf *),
1762 void *cbArg,
1763 int logBytesPerSector,
1764 struct proc * b_proc)
1765 {
1766 /* bp->b_flags = B_PHYS | rw_flag; */
1767 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1768 bp->b_bcount = numSect << logBytesPerSector;
1769 bp->b_bufsize = bp->b_bcount;
1770 bp->b_error = 0;
1771 bp->b_dev = dev;
1772 db1_printf(("bp->b_dev is %d\n", dev));
1773 bp->b_un.b_addr = buf;
1774 #if 0
1775 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1776 #endif
1777
1778 bp->b_blkno = startSect;
1779 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1780 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1781 if (bp->b_bcount == 0) {
1782 panic("bp->b_bcount is zero in InitBP!!\n");
1783 }
1784 bp->b_proc = b_proc;
1785 bp->b_iodone = cbFunc;
1786 bp->b_vp = b_vp;
1787
1788 }
1789 /* Extras... */
1790
1791 unsigned int
1792 rpcc()
1793 {
1794 /* XXX no clue what this is supposed to do.. my guess is that it's
1795 * supposed to read the CPU cycle counter... */
1796 /* db1_printf("this is supposed to do something useful too!??\n"); */
1797 return (0);
1798 }
1799 #if 0
1800 int
1801 rf_GetSpareTableFromDaemon(req)
1802 RF_SparetWait_t *req;
1803 {
1804 int retcode = 1;
1805 printf("This is supposed to do something useful!!\n"); /* XXX */
1806
1807 return (retcode);
1808
1809 }
1810 #endif
1811
1812 static void
1813 raidgetdefaultlabel(raidPtr, rs, lp)
1814 RF_Raid_t *raidPtr;
1815 struct raid_softc *rs;
1816 struct disklabel *lp;
1817 {
1818 db1_printf(("Building a default label...\n"));
1819 bzero(lp, sizeof(*lp));
1820
1821 /* fabricate a label... */
1822 lp->d_secperunit = raidPtr->totalSectors;
1823 lp->d_secsize = raidPtr->bytesPerSector;
1824 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1825 lp->d_ntracks = 1;
1826 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1827 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1828
1829 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1830 lp->d_type = DTYPE_RAID;
1831 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1832 lp->d_rpm = 3600;
1833 lp->d_interleave = 1;
1834 lp->d_flags = 0;
1835
1836 lp->d_partitions[RAW_PART].p_offset = 0;
1837 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1838 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1839 lp->d_npartitions = RAW_PART + 1;
1840
1841 lp->d_magic = DISKMAGIC;
1842 lp->d_magic2 = DISKMAGIC;
1843 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1844
1845 }
1846 /*
1847 * Read the disklabel from the raid device. If one is not present, fake one
1848 * up.
1849 */
1850 static void
1851 raidgetdisklabel(dev)
1852 dev_t dev;
1853 {
1854 int unit = raidunit(dev);
1855 struct raid_softc *rs = &raid_softc[unit];
1856 char *errstring;
1857 struct disklabel *lp = rs->sc_dkdev.dk_label;
1858 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1859 RF_Raid_t *raidPtr;
1860
1861 db1_printf(("Getting the disklabel...\n"));
1862
1863 bzero(clp, sizeof(*clp));
1864
1865 raidPtr = raidPtrs[unit];
1866
1867 raidgetdefaultlabel(raidPtr, rs, lp);
1868
1869 /*
1870 * Call the generic disklabel extraction routine.
1871 */
1872 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1873 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1874 if (errstring)
1875 raidmakedisklabel(rs);
1876 else {
1877 int i;
1878 struct partition *pp;
1879
1880 /*
1881 * Sanity check whether the found disklabel is valid.
1882 *
1883 * This is necessary since total size of the raid device
1884 * may vary when an interleave is changed even though exactly
1885 * same componets are used, and old disklabel may used
1886 * if that is found.
1887 */
1888 if (lp->d_secperunit != rs->sc_size)
1889 printf("WARNING: %s: "
1890 "total sector size in disklabel (%d) != "
1891 "the size of raid (%ld)\n", rs->sc_xname,
1892 lp->d_secperunit, (long) rs->sc_size);
1893 for (i = 0; i < lp->d_npartitions; i++) {
1894 pp = &lp->d_partitions[i];
1895 if (pp->p_offset + pp->p_size > rs->sc_size)
1896 printf("WARNING: %s: end of partition `%c' "
1897 "exceeds the size of raid (%ld)\n",
1898 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1899 }
1900 }
1901
1902 }
1903 /*
1904 * Take care of things one might want to take care of in the event
1905 * that a disklabel isn't present.
1906 */
1907 static void
1908 raidmakedisklabel(rs)
1909 struct raid_softc *rs;
1910 {
1911 struct disklabel *lp = rs->sc_dkdev.dk_label;
1912 db1_printf(("Making a label..\n"));
1913
1914 /*
1915 * For historical reasons, if there's no disklabel present
1916 * the raw partition must be marked FS_BSDFFS.
1917 */
1918
1919 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1920
1921 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1922
1923 lp->d_checksum = dkcksum(lp);
1924 }
1925 /*
1926 * Lookup the provided name in the filesystem. If the file exists,
1927 * is a valid block device, and isn't being used by anyone else,
1928 * set *vpp to the file's vnode.
1929 * You'll find the original of this in ccd.c
1930 */
1931 int
1932 raidlookup(path, p, vpp)
1933 char *path;
1934 struct proc *p;
1935 struct vnode **vpp; /* result */
1936 {
1937 struct nameidata nd;
1938 struct vnode *vp;
1939 struct vattr va;
1940 int error;
1941
1942 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1943 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1944 #ifdef DEBUG
1945 printf("RAIDframe: vn_open returned %d\n", error);
1946 #endif
1947 return (error);
1948 }
1949 vp = nd.ni_vp;
1950 if (vp->v_usecount > 1) {
1951 VOP_UNLOCK(vp, 0);
1952 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1953 return (EBUSY);
1954 }
1955 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1956 VOP_UNLOCK(vp, 0);
1957 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1958 return (error);
1959 }
1960 /* XXX: eventually we should handle VREG, too. */
1961 if (va.va_type != VBLK) {
1962 VOP_UNLOCK(vp, 0);
1963 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1964 return (ENOTBLK);
1965 }
1966 VOP_UNLOCK(vp, 0);
1967 *vpp = vp;
1968 return (0);
1969 }
1970 /*
1971 * Wait interruptibly for an exclusive lock.
1972 *
1973 * XXX
1974 * Several drivers do this; it should be abstracted and made MP-safe.
1975 * (Hmm... where have we seen this warning before :-> GO )
1976 */
1977 static int
1978 raidlock(rs)
1979 struct raid_softc *rs;
1980 {
1981 int error;
1982
1983 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1984 rs->sc_flags |= RAIDF_WANTED;
1985 if ((error =
1986 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1987 return (error);
1988 }
1989 rs->sc_flags |= RAIDF_LOCKED;
1990 return (0);
1991 }
1992 /*
1993 * Unlock and wake up any waiters.
1994 */
1995 static void
1996 raidunlock(rs)
1997 struct raid_softc *rs;
1998 {
1999
2000 rs->sc_flags &= ~RAIDF_LOCKED;
2001 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2002 rs->sc_flags &= ~RAIDF_WANTED;
2003 wakeup(rs);
2004 }
2005 }
2006
2007
2008 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2009 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2010
2011 int
2012 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2013 {
2014 RF_ComponentLabel_t component_label;
2015 raidread_component_label(dev, b_vp, &component_label);
2016 component_label.mod_counter = mod_counter;
2017 component_label.clean = RF_RAID_CLEAN;
2018 raidwrite_component_label(dev, b_vp, &component_label);
2019 return(0);
2020 }
2021
2022
2023 int
2024 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2025 {
2026 RF_ComponentLabel_t component_label;
2027 raidread_component_label(dev, b_vp, &component_label);
2028 component_label.mod_counter = mod_counter;
2029 component_label.clean = RF_RAID_DIRTY;
2030 raidwrite_component_label(dev, b_vp, &component_label);
2031 return(0);
2032 }
2033
2034 /* ARGSUSED */
2035 int
2036 raidread_component_label(dev, b_vp, component_label)
2037 dev_t dev;
2038 struct vnode *b_vp;
2039 RF_ComponentLabel_t *component_label;
2040 {
2041 struct buf *bp;
2042 int error;
2043
2044 /* XXX should probably ensure that we don't try to do this if
2045 someone has changed rf_protected_sectors. */
2046
2047 /* get a block of the appropriate size... */
2048 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2049 bp->b_dev = dev;
2050
2051 /* get our ducks in a row for the read */
2052 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2053 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2054 bp->b_flags = B_BUSY | B_READ;
2055 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2056
2057 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2058
2059 error = biowait(bp);
2060
2061 if (!error) {
2062 memcpy(component_label, bp->b_un.b_addr,
2063 sizeof(RF_ComponentLabel_t));
2064 #if 0
2065 printf("raidread_component_label: got component label:\n");
2066 printf("Version: %d\n",component_label->version);
2067 printf("Serial Number: %d\n",component_label->serial_number);
2068 printf("Mod counter: %d\n",component_label->mod_counter);
2069 printf("Row: %d\n", component_label->row);
2070 printf("Column: %d\n", component_label->column);
2071 printf("Num Rows: %d\n", component_label->num_rows);
2072 printf("Num Columns: %d\n", component_label->num_columns);
2073 printf("Clean: %d\n", component_label->clean);
2074 printf("Status: %d\n", component_label->status);
2075 #endif
2076 } else {
2077 printf("Failed to read RAID component label!\n");
2078 }
2079
2080 bp->b_flags = B_INVAL | B_AGE;
2081 brelse(bp);
2082 return(error);
2083 }
2084 /* ARGSUSED */
2085 int
2086 raidwrite_component_label(dev, b_vp, component_label)
2087 dev_t dev;
2088 struct vnode *b_vp;
2089 RF_ComponentLabel_t *component_label;
2090 {
2091 struct buf *bp;
2092 int error;
2093
2094 /* get a block of the appropriate size... */
2095 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2096 bp->b_dev = dev;
2097
2098 /* get our ducks in a row for the write */
2099 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2100 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2101 bp->b_flags = B_BUSY | B_WRITE;
2102 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2103
2104 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2105
2106 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2107
2108 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2109 error = biowait(bp);
2110 bp->b_flags = B_INVAL | B_AGE;
2111 brelse(bp);
2112 if (error) {
2113 printf("Failed to write RAID component info!\n");
2114 }
2115
2116 return(error);
2117 }
2118
2119 void
2120 rf_markalldirty( raidPtr )
2121 RF_Raid_t *raidPtr;
2122 {
2123 RF_ComponentLabel_t c_label;
2124 int r,c;
2125
2126 raidPtr->mod_counter++;
2127 for (r = 0; r < raidPtr->numRow; r++) {
2128 for (c = 0; c < raidPtr->numCol; c++) {
2129 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2130 raidread_component_label(
2131 raidPtr->Disks[r][c].dev,
2132 raidPtr->raid_cinfo[r][c].ci_vp,
2133 &c_label);
2134 if (c_label.status == rf_ds_spared) {
2135 /* XXX do something special...
2136 but whatever you do, don't
2137 try to access it!! */
2138 } else {
2139 #if 0
2140 c_label.status =
2141 raidPtr->Disks[r][c].status;
2142 raidwrite_component_label(
2143 raidPtr->Disks[r][c].dev,
2144 raidPtr->raid_cinfo[r][c].ci_vp,
2145 &c_label);
2146 #endif
2147 raidmarkdirty(
2148 raidPtr->Disks[r][c].dev,
2149 raidPtr->raid_cinfo[r][c].ci_vp,
2150 raidPtr->mod_counter);
2151 }
2152 }
2153 }
2154 }
2155 /* printf("Component labels marked dirty.\n"); */
2156 #if 0
2157 for( c = 0; c < raidPtr->numSpare ; c++) {
2158 sparecol = raidPtr->numCol + c;
2159 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2160 /*
2161
2162 XXX this is where we get fancy and map this spare
2163 into it's correct spot in the array.
2164
2165 */
2166 /*
2167
2168 we claim this disk is "optimal" if it's
2169 rf_ds_used_spare, as that means it should be
2170 directly substitutable for the disk it replaced.
2171 We note that too...
2172
2173 */
2174
2175 for(i=0;i<raidPtr->numRow;i++) {
2176 for(j=0;j<raidPtr->numCol;j++) {
2177 if ((raidPtr->Disks[i][j].spareRow ==
2178 r) &&
2179 (raidPtr->Disks[i][j].spareCol ==
2180 sparecol)) {
2181 srow = r;
2182 scol = sparecol;
2183 break;
2184 }
2185 }
2186 }
2187
2188 raidread_component_label(
2189 raidPtr->Disks[r][sparecol].dev,
2190 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2191 &c_label);
2192 /* make sure status is noted */
2193 c_label.version = RF_COMPONENT_LABEL_VERSION;
2194 c_label.mod_counter = raidPtr->mod_counter;
2195 c_label.serial_number = raidPtr->serial_number;
2196 c_label.row = srow;
2197 c_label.column = scol;
2198 c_label.num_rows = raidPtr->numRow;
2199 c_label.num_columns = raidPtr->numCol;
2200 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2201 c_label.status = rf_ds_optimal;
2202 raidwrite_component_label(
2203 raidPtr->Disks[r][sparecol].dev,
2204 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2205 &c_label);
2206 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2207 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2208 }
2209 }
2210
2211 #endif
2212 }
2213
2214
2215 void
2216 rf_update_component_labels( raidPtr )
2217 RF_Raid_t *raidPtr;
2218 {
2219 RF_ComponentLabel_t c_label;
2220 int sparecol;
2221 int r,c;
2222 int i,j;
2223 int srow, scol;
2224
2225 srow = -1;
2226 scol = -1;
2227
2228 /* XXX should do extra checks to make sure things really are clean,
2229 rather than blindly setting the clean bit... */
2230
2231 raidPtr->mod_counter++;
2232
2233 for (r = 0; r < raidPtr->numRow; r++) {
2234 for (c = 0; c < raidPtr->numCol; c++) {
2235 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2236 raidread_component_label(
2237 raidPtr->Disks[r][c].dev,
2238 raidPtr->raid_cinfo[r][c].ci_vp,
2239 &c_label);
2240 /* make sure status is noted */
2241 c_label.status = rf_ds_optimal;
2242 raidwrite_component_label(
2243 raidPtr->Disks[r][c].dev,
2244 raidPtr->raid_cinfo[r][c].ci_vp,
2245 &c_label);
2246 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2247 raidmarkclean(
2248 raidPtr->Disks[r][c].dev,
2249 raidPtr->raid_cinfo[r][c].ci_vp,
2250 raidPtr->mod_counter);
2251 }
2252 }
2253 /* else we don't touch it.. */
2254 #if 0
2255 else if (raidPtr->Disks[r][c].status !=
2256 rf_ds_failed) {
2257 raidread_component_label(
2258 raidPtr->Disks[r][c].dev,
2259 raidPtr->raid_cinfo[r][c].ci_vp,
2260 &c_label);
2261 /* make sure status is noted */
2262 c_label.status =
2263 raidPtr->Disks[r][c].status;
2264 raidwrite_component_label(
2265 raidPtr->Disks[r][c].dev,
2266 raidPtr->raid_cinfo[r][c].ci_vp,
2267 &c_label);
2268 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2269 raidmarkclean(
2270 raidPtr->Disks[r][c].dev,
2271 raidPtr->raid_cinfo[r][c].ci_vp,
2272 raidPtr->mod_counter);
2273 }
2274 }
2275 #endif
2276 }
2277 }
2278
2279 for( c = 0; c < raidPtr->numSpare ; c++) {
2280 sparecol = raidPtr->numCol + c;
2281 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2282 /*
2283
2284 we claim this disk is "optimal" if it's
2285 rf_ds_used_spare, as that means it should be
2286 directly substitutable for the disk it replaced.
2287 We note that too...
2288
2289 */
2290
2291 for(i=0;i<raidPtr->numRow;i++) {
2292 for(j=0;j<raidPtr->numCol;j++) {
2293 if ((raidPtr->Disks[i][j].spareRow ==
2294 0) &&
2295 (raidPtr->Disks[i][j].spareCol ==
2296 sparecol)) {
2297 srow = i;
2298 scol = j;
2299 break;
2300 }
2301 }
2302 }
2303
2304 raidread_component_label(
2305 raidPtr->Disks[0][sparecol].dev,
2306 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2307 &c_label);
2308 /* make sure status is noted */
2309 c_label.version = RF_COMPONENT_LABEL_VERSION;
2310 c_label.mod_counter = raidPtr->mod_counter;
2311 c_label.serial_number = raidPtr->serial_number;
2312 c_label.row = srow;
2313 c_label.column = scol;
2314 c_label.num_rows = raidPtr->numRow;
2315 c_label.num_columns = raidPtr->numCol;
2316 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2317 c_label.status = rf_ds_optimal;
2318 raidwrite_component_label(
2319 raidPtr->Disks[0][sparecol].dev,
2320 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2321 &c_label);
2322 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2323 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2324 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2325 raidPtr->mod_counter);
2326 }
2327 }
2328 }
2329 /* printf("Component labels updated\n"); */
2330 }
2331