rf_netbsdkintf.c revision 1.16.2.7 1 /* $NetBSD: rf_netbsdkintf.c,v 1.16.2.7 1999/09/28 04:47:51 cgd Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 if ((rs->sc_flags & RAIDF_INITED) == 0)
736 return (ENXIO);
737 }
738
739 switch (cmd) {
740
741
742 /* configure the system */
743 case RAIDFRAME_CONFIGURE:
744
745 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
746 /* copy-in the configuration information */
747 /* data points to a pointer to the configuration structure */
748 u_cfg = *((RF_Config_t **) data);
749 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
750 if (k_cfg == NULL) {
751 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
752 return (ENOMEM);
753 }
754 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
755 sizeof(RF_Config_t));
756 if (retcode) {
757 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
758 retcode));
759 return (retcode);
760 }
761 /* allocate a buffer for the layout-specific data, and copy it
762 * in */
763 if (k_cfg->layoutSpecificSize) {
764 if (k_cfg->layoutSpecificSize > 10000) {
765 /* sanity check */
766 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
767 return (EINVAL);
768 }
769 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
770 (u_char *));
771 if (specific_buf == NULL) {
772 RF_Free(k_cfg, sizeof(RF_Config_t));
773 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
774 return (ENOMEM);
775 }
776 retcode = copyin(k_cfg->layoutSpecific,
777 (caddr_t) specific_buf,
778 k_cfg->layoutSpecificSize);
779 if (retcode) {
780 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
781 retcode));
782 return (retcode);
783 }
784 } else
785 specific_buf = NULL;
786 k_cfg->layoutSpecific = specific_buf;
787
788 /* should do some kind of sanity check on the configuration.
789 * Store the sum of all the bytes in the last byte? */
790
791 #if 0
792 db1_printf(("Considering configuring the system.:%d 0x%x\n",
793 unit, p));
794 #endif
795
796 /* We need the pointer to this a little deeper, so stash it
797 * here... */
798
799 raidPtrs[unit]->proc = p;
800
801 /* configure the system */
802
803 raidPtrs[unit]->raidid = unit;
804
805 retcode = rf_Configure(raidPtrs[unit], k_cfg);
806
807 /* allow this many simultaneous IO's to this RAID device */
808 raidPtrs[unit]->openings = RAIDOUTSTANDING;
809
810 if (retcode == 0) {
811 retcode = raidinit(dev, raidPtrs[unit], unit);
812 rf_markalldirty( raidPtrs[unit] );
813 }
814 /* free the buffers. No return code here. */
815 if (k_cfg->layoutSpecificSize) {
816 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
817 }
818 RF_Free(k_cfg, sizeof(RF_Config_t));
819
820 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
821 retcode));
822
823 return (retcode);
824
825 /* shutdown the system */
826 case RAIDFRAME_SHUTDOWN:
827
828 if ((error = raidlock(rs)) != 0)
829 return (error);
830
831 /*
832 * If somebody has a partition mounted, we shouldn't
833 * shutdown.
834 */
835
836 part = DISKPART(dev);
837 pmask = (1 << part);
838 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
839 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
840 (rs->sc_dkdev.dk_copenmask & pmask))) {
841 raidunlock(rs);
842 return (EBUSY);
843 }
844
845 if (rf_debugKernelAccess) {
846 printf("call shutdown\n");
847 }
848 raidPtrs[unit]->proc = p; /* XXX necessary evil */
849
850 retcode = rf_Shutdown(raidPtrs[unit]);
851
852 db1_printf(("Done main shutdown\n"));
853
854 pool_destroy(&rs->sc_cbufpool);
855 db1_printf(("Done freeing component buffer freelist\n"));
856
857 /* It's no longer initialized... */
858 rs->sc_flags &= ~RAIDF_INITED;
859
860 /* Detach the disk. */
861 disk_detach(&rs->sc_dkdev);
862
863 raidunlock(rs);
864
865 return (retcode);
866 case RAIDFRAME_GET_COMPONENT_LABEL:
867 c_label_ptr = (RF_ComponentLabel_t **) data;
868 /* need to read the component label for the disk indicated
869 by row,column in component_label
870 XXX need to sanity check these values!!!
871 */
872
873 /* For practice, let's get it directly fromdisk, rather
874 than from the in-core copy */
875 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
876 (RF_ComponentLabel_t *));
877 if (component_label == NULL)
878 return (ENOMEM);
879
880 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
881
882 retcode = copyin( *c_label_ptr, component_label,
883 sizeof(RF_ComponentLabel_t));
884
885 if (retcode) {
886 return(retcode);
887 }
888
889 row = component_label->row;
890 column = component_label->column;
891
892 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
893 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
894 return(EINVAL);
895 }
896
897 raidread_component_label(
898 raidPtrs[unit]->Disks[row][column].dev,
899 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
900 component_label );
901
902 retcode = copyout((caddr_t) component_label,
903 (caddr_t) *c_label_ptr,
904 sizeof(RF_ComponentLabel_t));
905 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
906 return (retcode);
907
908 case RAIDFRAME_SET_COMPONENT_LABEL:
909 component_label = (RF_ComponentLabel_t *) data;
910
911 /* XXX check the label for valid stuff... */
912 /* Note that some things *should not* get modified --
913 the user should be re-initing the labels instead of
914 trying to patch things.
915 */
916
917 printf("Got component label:\n");
918 printf("Version: %d\n",component_label->version);
919 printf("Serial Number: %d\n",component_label->serial_number);
920 printf("Mod counter: %d\n",component_label->mod_counter);
921 printf("Row: %d\n", component_label->row);
922 printf("Column: %d\n", component_label->column);
923 printf("Num Rows: %d\n", component_label->num_rows);
924 printf("Num Columns: %d\n", component_label->num_columns);
925 printf("Clean: %d\n", component_label->clean);
926 printf("Status: %d\n", component_label->status);
927
928 row = component_label->row;
929 column = component_label->column;
930
931 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
932 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
933 return(EINVAL);
934 }
935
936 /* XXX this isn't allowed to do anything for now :-) */
937 #if 0
938 raidwrite_component_label(
939 raidPtrs[unit]->Disks[row][column].dev,
940 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
941 component_label );
942 #endif
943 return (0);
944
945 case RAIDFRAME_INIT_LABELS:
946 component_label = (RF_ComponentLabel_t *) data;
947 /*
948 we only want the serial number from
949 the above. We get all the rest of the information
950 from the config that was used to create this RAID
951 set.
952 */
953
954 raidPtrs[unit]->serial_number = component_label->serial_number;
955 /* current version number */
956 ci_label.version = RF_COMPONENT_LABEL_VERSION;
957 ci_label.serial_number = component_label->serial_number;
958 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
959 ci_label.num_rows = raidPtrs[unit]->numRow;
960 ci_label.num_columns = raidPtrs[unit]->numCol;
961 ci_label.clean = RF_RAID_DIRTY; /* not clean */
962 ci_label.status = rf_ds_optimal; /* "It's good!" */
963
964 for(row=0;row<raidPtrs[unit]->numRow;row++) {
965 ci_label.row = row;
966 for(column=0;column<raidPtrs[unit]->numCol;column++) {
967 ci_label.column = column;
968 raidwrite_component_label(
969 raidPtrs[unit]->Disks[row][column].dev,
970 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
971 &ci_label );
972 }
973 }
974
975 return (retcode);
976
977 /* initialize all parity */
978 case RAIDFRAME_REWRITEPARITY:
979
980 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
981 /* Parity for RAID 0 is trivially correct */
982 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
983 return(0);
984 }
985
986 /* borrow the thread of the requesting process */
987 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
988 retcode = rf_RewriteParity(raidPtrs[unit]);
989 /* return I/O Error if the parity rewrite fails */
990
991 if (retcode) {
992 retcode = EIO;
993 } else {
994 /* set the clean bit! If we shutdown correctly,
995 the clean bit on each component label will get
996 set */
997 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
998 }
999 return (retcode);
1000
1001
1002 case RAIDFRAME_ADD_HOT_SPARE:
1003 sparePtr = (RF_SingleComponent_t *) data;
1004 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1005 printf("Adding spare\n");
1006 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1007 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1008 return(retcode);
1009
1010 case RAIDFRAME_REMOVE_HOT_SPARE:
1011 return(retcode);
1012
1013 case RAIDFRAME_REBUILD_IN_PLACE:
1014
1015 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1016 /* Can't do this on a RAID 0!! */
1017 return(EINVAL);
1018 }
1019
1020 componentPtr = (RF_SingleComponent_t *) data;
1021 memcpy( &component, componentPtr,
1022 sizeof(RF_SingleComponent_t));
1023 row = component.row;
1024 column = component.column;
1025 printf("Rebuild: %d %d\n",row, column);
1026 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1027 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1028 return(EINVAL);
1029 }
1030 printf("Attempting a rebuild in place\n");
1031 s = splbio();
1032 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1033 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1034 splx(s);
1035 return(retcode);
1036
1037 /* issue a test-unit-ready through raidframe to the indicated
1038 * device */
1039 #if 0 /* XXX not supported yet (ever?) */
1040 case RAIDFRAME_TUR:
1041 /* debug only */
1042 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1043 return (retcode);
1044 #endif
1045 case RAIDFRAME_GET_INFO:
1046 {
1047 RF_Raid_t *raid = raidPtrs[unit];
1048 RF_DeviceConfig_t *cfg, **ucfgp;
1049 int i, j, d;
1050
1051 if (!raid->valid)
1052 return (ENODEV);
1053 ucfgp = (RF_DeviceConfig_t **) data;
1054 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1055 (RF_DeviceConfig_t *));
1056 if (cfg == NULL)
1057 return (ENOMEM);
1058 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1059 cfg->rows = raid->numRow;
1060 cfg->cols = raid->numCol;
1061 cfg->ndevs = raid->numRow * raid->numCol;
1062 if (cfg->ndevs >= RF_MAX_DISKS) {
1063 cfg->ndevs = 0;
1064 return (ENOMEM);
1065 }
1066 cfg->nspares = raid->numSpare;
1067 if (cfg->nspares >= RF_MAX_DISKS) {
1068 cfg->nspares = 0;
1069 return (ENOMEM);
1070 }
1071 cfg->maxqdepth = raid->maxQueueDepth;
1072 d = 0;
1073 for (i = 0; i < cfg->rows; i++) {
1074 for (j = 0; j < cfg->cols; j++) {
1075 cfg->devs[d] = raid->Disks[i][j];
1076 d++;
1077 }
1078 }
1079 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1080 cfg->spares[i] = raid->Disks[0][j];
1081 }
1082 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1083 sizeof(RF_DeviceConfig_t));
1084 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1085
1086 return (retcode);
1087 }
1088 break;
1089
1090 case RAIDFRAME_RESET_ACCTOTALS:
1091 {
1092 RF_Raid_t *raid = raidPtrs[unit];
1093
1094 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1095 return (0);
1096 }
1097 break;
1098
1099 case RAIDFRAME_GET_ACCTOTALS:
1100 {
1101 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1102 RF_Raid_t *raid = raidPtrs[unit];
1103
1104 *totals = raid->acc_totals;
1105 return (0);
1106 }
1107 break;
1108
1109 case RAIDFRAME_KEEP_ACCTOTALS:
1110 {
1111 RF_Raid_t *raid = raidPtrs[unit];
1112 int *keep = (int *) data;
1113
1114 raid->keep_acc_totals = *keep;
1115 return (0);
1116 }
1117 break;
1118
1119 case RAIDFRAME_GET_SIZE:
1120 *(int *) data = raidPtrs[unit]->totalSectors;
1121 return (0);
1122
1123 #define RAIDFRAME_RECON 1
1124 /* XXX The above should probably be set somewhere else!! GO */
1125 #if RAIDFRAME_RECON > 0
1126
1127 /* fail a disk & optionally start reconstruction */
1128 case RAIDFRAME_FAIL_DISK:
1129
1130 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1131 /* Can't do this on a RAID 0!! */
1132 return(EINVAL);
1133 }
1134
1135 rr = (struct rf_recon_req *) data;
1136
1137 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1138 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1139 return (EINVAL);
1140
1141 printf("raid%d: Failing the disk: row: %d col: %d\n",
1142 unit, rr->row, rr->col);
1143
1144 /* make a copy of the recon request so that we don't rely on
1145 * the user's buffer */
1146 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1147 bcopy(rr, rrcopy, sizeof(*rr));
1148 rrcopy->raidPtr = (void *) raidPtrs[unit];
1149
1150 LOCK_RECON_Q_MUTEX();
1151 rrcopy->next = recon_queue;
1152 recon_queue = rrcopy;
1153 wakeup(&recon_queue);
1154 UNLOCK_RECON_Q_MUTEX();
1155
1156 return (0);
1157
1158 /* invoke a copyback operation after recon on whatever disk
1159 * needs it, if any */
1160 case RAIDFRAME_COPYBACK:
1161
1162 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1163 /* This makes no sense on a RAID 0!! */
1164 return(EINVAL);
1165 }
1166
1167 /* borrow the current thread to get this done */
1168 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1169 s = splbio();
1170 rf_CopybackReconstructedData(raidPtrs[unit]);
1171 splx(s);
1172 return (0);
1173
1174 /* return the percentage completion of reconstruction */
1175 case RAIDFRAME_CHECKRECON:
1176 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1177 /* This makes no sense on a RAID 0 */
1178 return(EINVAL);
1179 }
1180
1181 row = *(int *) data;
1182 if (row < 0 || row >= raidPtrs[unit]->numRow)
1183 return (EINVAL);
1184 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1185 *(int *) data = 100;
1186 else
1187 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1188 return (0);
1189
1190 /* the sparetable daemon calls this to wait for the kernel to
1191 * need a spare table. this ioctl does not return until a
1192 * spare table is needed. XXX -- calling mpsleep here in the
1193 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1194 * -- I should either compute the spare table in the kernel,
1195 * or have a different -- XXX XXX -- interface (a different
1196 * character device) for delivering the table -- XXX */
1197 #if 0
1198 case RAIDFRAME_SPARET_WAIT:
1199 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1200 while (!rf_sparet_wait_queue)
1201 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1202 waitreq = rf_sparet_wait_queue;
1203 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1204 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1205
1206 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1207
1208 RF_Free(waitreq, sizeof(*waitreq));
1209 return (0);
1210
1211
1212 /* wakes up a process waiting on SPARET_WAIT and puts an error
1213 * code in it that will cause the dameon to exit */
1214 case RAIDFRAME_ABORT_SPARET_WAIT:
1215 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1216 waitreq->fcol = -1;
1217 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1218 waitreq->next = rf_sparet_wait_queue;
1219 rf_sparet_wait_queue = waitreq;
1220 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1221 wakeup(&rf_sparet_wait_queue);
1222 return (0);
1223
1224 /* used by the spare table daemon to deliver a spare table
1225 * into the kernel */
1226 case RAIDFRAME_SEND_SPARET:
1227
1228 /* install the spare table */
1229 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1230
1231 /* respond to the requestor. the return status of the spare
1232 * table installation is passed in the "fcol" field */
1233 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1234 waitreq->fcol = retcode;
1235 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1236 waitreq->next = rf_sparet_resp_queue;
1237 rf_sparet_resp_queue = waitreq;
1238 wakeup(&rf_sparet_resp_queue);
1239 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1240
1241 return (retcode);
1242 #endif
1243
1244
1245 #endif /* RAIDFRAME_RECON > 0 */
1246
1247 default:
1248 break; /* fall through to the os-specific code below */
1249
1250 }
1251
1252 if (!raidPtrs[unit]->valid)
1253 return (EINVAL);
1254
1255 /*
1256 * Add support for "regular" device ioctls here.
1257 */
1258
1259 switch (cmd) {
1260 case DIOCGDINFO:
1261 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1262 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1263 break;
1264
1265 case DIOCGPART:
1266 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1267 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1268 ((struct partinfo *) data)->part =
1269 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1270 break;
1271
1272 case DIOCWDINFO:
1273 db1_printf(("DIOCWDINFO\n"));
1274 case DIOCSDINFO:
1275 db1_printf(("DIOCSDINFO\n"));
1276 if ((error = raidlock(rs)) != 0)
1277 return (error);
1278
1279 rs->sc_flags |= RAIDF_LABELLING;
1280
1281 error = setdisklabel(rs->sc_dkdev.dk_label,
1282 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1283 if (error == 0) {
1284 if (cmd == DIOCWDINFO)
1285 error = writedisklabel(RAIDLABELDEV(dev),
1286 raidstrategy, rs->sc_dkdev.dk_label,
1287 rs->sc_dkdev.dk_cpulabel);
1288 }
1289 rs->sc_flags &= ~RAIDF_LABELLING;
1290
1291 raidunlock(rs);
1292
1293 if (error)
1294 return (error);
1295 break;
1296
1297 case DIOCWLABEL:
1298 db1_printf(("DIOCWLABEL\n"));
1299 if (*(int *) data != 0)
1300 rs->sc_flags |= RAIDF_WLABEL;
1301 else
1302 rs->sc_flags &= ~RAIDF_WLABEL;
1303 break;
1304
1305 case DIOCGDEFLABEL:
1306 db1_printf(("DIOCGDEFLABEL\n"));
1307 raidgetdefaultlabel(raidPtrs[unit], rs,
1308 (struct disklabel *) data);
1309 break;
1310
1311 default:
1312 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1313 }
1314 return (retcode);
1315
1316 }
1317
1318
1319 /* raidinit -- complete the rest of the initialization for the
1320 RAIDframe device. */
1321
1322
1323 static int
1324 raidinit(dev, raidPtr, unit)
1325 dev_t dev;
1326 RF_Raid_t *raidPtr;
1327 int unit;
1328 {
1329 int retcode;
1330 /* int ix; */
1331 /* struct raidbuf *raidbp; */
1332 struct raid_softc *rs;
1333
1334 retcode = 0;
1335
1336 rs = &raid_softc[unit];
1337 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1338 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1339
1340
1341 /* XXX should check return code first... */
1342 rs->sc_flags |= RAIDF_INITED;
1343
1344 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1345
1346 rs->sc_dkdev.dk_name = rs->sc_xname;
1347
1348 /* disk_attach actually creates space for the CPU disklabel, among
1349 * other things, so it's critical to call this *BEFORE* we try putzing
1350 * with disklabels. */
1351
1352 disk_attach(&rs->sc_dkdev);
1353
1354 /* XXX There may be a weird interaction here between this, and
1355 * protectedSectors, as used in RAIDframe. */
1356
1357 rs->sc_size = raidPtr->totalSectors;
1358 rs->sc_dev = dev;
1359
1360 return (retcode);
1361 }
1362
1363 /*
1364 * This kernel thread never exits. It is created once, and persists
1365 * until the system reboots.
1366 */
1367
1368 void
1369 rf_ReconKernelThread()
1370 {
1371 struct rf_recon_req *req;
1372 int s;
1373
1374 /* XXX not sure what spl() level we should be at here... probably
1375 * splbio() */
1376 s = splbio();
1377
1378 while (1) {
1379 /* grab the next reconstruction request from the queue */
1380 LOCK_RECON_Q_MUTEX();
1381 while (!recon_queue) {
1382 UNLOCK_RECON_Q_MUTEX();
1383 tsleep(&recon_queue, PRIBIO,
1384 "raidframe recon", 0);
1385 LOCK_RECON_Q_MUTEX();
1386 }
1387 req = recon_queue;
1388 recon_queue = recon_queue->next;
1389 UNLOCK_RECON_Q_MUTEX();
1390
1391 /*
1392 * If flags specifies that we should start recon, this call
1393 * will not return until reconstruction completes, fails,
1394 * or is aborted.
1395 */
1396 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1397 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1398
1399 RF_Free(req, sizeof(*req));
1400 }
1401 }
1402 /* wake up the daemon & tell it to get us a spare table
1403 * XXX
1404 * the entries in the queues should be tagged with the raidPtr
1405 * so that in the extremely rare case that two recons happen at once,
1406 * we know for which device were requesting a spare table
1407 * XXX
1408 */
1409 int
1410 rf_GetSpareTableFromDaemon(req)
1411 RF_SparetWait_t *req;
1412 {
1413 int retcode;
1414
1415 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1416 req->next = rf_sparet_wait_queue;
1417 rf_sparet_wait_queue = req;
1418 wakeup(&rf_sparet_wait_queue);
1419
1420 /* mpsleep unlocks the mutex */
1421 while (!rf_sparet_resp_queue) {
1422 tsleep(&rf_sparet_resp_queue, PRIBIO,
1423 "raidframe getsparetable", 0);
1424 #if 0
1425 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1426 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1427 MS_LOCK_SIMPLE);
1428 #endif
1429 }
1430 req = rf_sparet_resp_queue;
1431 rf_sparet_resp_queue = req->next;
1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1433
1434 retcode = req->fcol;
1435 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1436 * alloc'd */
1437 return (retcode);
1438 }
1439 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1440 * bp & passes it down.
1441 * any calls originating in the kernel must use non-blocking I/O
1442 * do some extra sanity checking to return "appropriate" error values for
1443 * certain conditions (to make some standard utilities work)
1444 */
1445 int
1446 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1447 RF_Raid_t *raidPtr;
1448 struct buf *bp;
1449 RF_RaidAccessFlags_t flags;
1450 void (*cbFunc) (struct buf *);
1451 void *cbArg;
1452 {
1453 RF_SectorCount_t num_blocks, pb, sum;
1454 RF_RaidAddr_t raid_addr;
1455 int retcode;
1456 struct partition *pp;
1457 daddr_t blocknum;
1458 int unit;
1459 struct raid_softc *rs;
1460 int do_async;
1461
1462 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1463
1464 unit = raidPtr->raidid;
1465 rs = &raid_softc[unit];
1466
1467 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1468 * partition.. Need to make it absolute to the underlying device.. */
1469
1470 blocknum = bp->b_blkno;
1471 if (DISKPART(bp->b_dev) != RAW_PART) {
1472 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1473 blocknum += pp->p_offset;
1474 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1475 pp->p_offset));
1476 } else {
1477 db1_printf(("Is raw..\n"));
1478 }
1479 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1480
1481 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1482 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1483
1484 /* *THIS* is where we adjust what block we're going to... but DO NOT
1485 * TOUCH bp->b_blkno!!! */
1486 raid_addr = blocknum;
1487
1488 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1489 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1490 sum = raid_addr + num_blocks + pb;
1491 if (1 || rf_debugKernelAccess) {
1492 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1493 (int) raid_addr, (int) sum, (int) num_blocks,
1494 (int) pb, (int) bp->b_resid));
1495 }
1496 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1497 || (sum < num_blocks) || (sum < pb)) {
1498 bp->b_error = ENOSPC;
1499 bp->b_flags |= B_ERROR;
1500 bp->b_resid = bp->b_bcount;
1501 biodone(bp);
1502 return (bp->b_error);
1503 }
1504 /*
1505 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1506 */
1507
1508 if (bp->b_bcount & raidPtr->sectorMask) {
1509 bp->b_error = EINVAL;
1510 bp->b_flags |= B_ERROR;
1511 bp->b_resid = bp->b_bcount;
1512 biodone(bp);
1513 return (bp->b_error);
1514 }
1515 db1_printf(("Calling DoAccess..\n"));
1516
1517
1518 /* Put a throttle on the number of requests we handle simultanously */
1519
1520 RF_LOCK_MUTEX(raidPtr->mutex);
1521
1522 while(raidPtr->openings <= 0) {
1523 RF_UNLOCK_MUTEX(raidPtr->mutex);
1524 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1525 RF_LOCK_MUTEX(raidPtr->mutex);
1526 }
1527 raidPtr->openings--;
1528
1529 RF_UNLOCK_MUTEX(raidPtr->mutex);
1530
1531 /*
1532 * Everything is async.
1533 */
1534 do_async = 1;
1535
1536 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1537 * B_READ instead */
1538 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1539 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1540 do_async, raid_addr, num_blocks,
1541 bp->b_un.b_addr,
1542 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1543 NULL, cbFunc, cbArg);
1544 #if 0
1545 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1546 bp->b_data, (int) bp->b_resid));
1547 #endif
1548
1549 return (retcode);
1550 }
1551 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1552
1553 int
1554 rf_DispatchKernelIO(queue, req)
1555 RF_DiskQueue_t *queue;
1556 RF_DiskQueueData_t *req;
1557 {
1558 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1559 struct buf *bp;
1560 struct raidbuf *raidbp = NULL;
1561 struct raid_softc *rs;
1562 int unit;
1563
1564 /* XXX along with the vnode, we also need the softc associated with
1565 * this device.. */
1566
1567 req->queue = queue;
1568
1569 unit = queue->raidPtr->raidid;
1570
1571 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1572
1573 if (unit >= numraid) {
1574 printf("Invalid unit number: %d %d\n", unit, numraid);
1575 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1576 }
1577 rs = &raid_softc[unit];
1578
1579 /* XXX is this the right place? */
1580 disk_busy(&rs->sc_dkdev);
1581
1582 bp = req->bp;
1583 #if 1
1584 /* XXX when there is a physical disk failure, someone is passing us a
1585 * buffer that contains old stuff!! Attempt to deal with this problem
1586 * without taking a performance hit... (not sure where the real bug
1587 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1588
1589 if (bp->b_flags & B_ERROR) {
1590 bp->b_flags &= ~B_ERROR;
1591 }
1592 if (bp->b_error != 0) {
1593 bp->b_error = 0;
1594 }
1595 #endif
1596 raidbp = RAIDGETBUF(rs);
1597
1598 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1599
1600 /*
1601 * context for raidiodone
1602 */
1603 raidbp->rf_obp = bp;
1604 raidbp->req = req;
1605
1606 switch (req->type) {
1607 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1608 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1609 * queue->row, queue->col); */
1610 /* XXX need to do something extra here.. */
1611 /* I'm leaving this in, as I've never actually seen it used,
1612 * and I'd like folks to report it... GO */
1613 printf(("WAKEUP CALLED\n"));
1614 queue->numOutstanding++;
1615
1616 /* XXX need to glue the original buffer into this?? */
1617
1618 KernelWakeupFunc(&raidbp->rf_buf);
1619 break;
1620
1621 case RF_IO_TYPE_READ:
1622 case RF_IO_TYPE_WRITE:
1623
1624 if (req->tracerec) {
1625 RF_ETIMER_START(req->tracerec->timer);
1626 }
1627 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1628 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1629 req->sectorOffset, req->numSector,
1630 req->buf, KernelWakeupFunc, (void *) req,
1631 queue->raidPtr->logBytesPerSector, req->b_proc);
1632
1633 if (rf_debugKernelAccess) {
1634 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1635 (long) bp->b_blkno));
1636 }
1637 queue->numOutstanding++;
1638 queue->last_deq_sector = req->sectorOffset;
1639 /* acc wouldn't have been let in if there were any pending
1640 * reqs at any other priority */
1641 queue->curPriority = req->priority;
1642 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1643 * req->type, queue->row, queue->col); */
1644
1645 db1_printf(("Going for %c to unit %d row %d col %d\n",
1646 req->type, unit, queue->row, queue->col));
1647 db1_printf(("sector %d count %d (%d bytes) %d\n",
1648 (int) req->sectorOffset, (int) req->numSector,
1649 (int) (req->numSector <<
1650 queue->raidPtr->logBytesPerSector),
1651 (int) queue->raidPtr->logBytesPerSector));
1652 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1653 raidbp->rf_buf.b_vp->v_numoutput++;
1654 }
1655 VOP_STRATEGY(&raidbp->rf_buf);
1656
1657 break;
1658
1659 default:
1660 panic("bad req->type in rf_DispatchKernelIO");
1661 }
1662 db1_printf(("Exiting from DispatchKernelIO\n"));
1663 return (0);
1664 }
1665 /* this is the callback function associated with a I/O invoked from
1666 kernel code.
1667 */
1668 static void
1669 KernelWakeupFunc(vbp)
1670 struct buf *vbp;
1671 {
1672 RF_DiskQueueData_t *req = NULL;
1673 RF_DiskQueue_t *queue;
1674 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1675 struct buf *bp;
1676 struct raid_softc *rs;
1677 int unit;
1678 register int s;
1679
1680 s = splbio(); /* XXX */
1681 db1_printf(("recovering the request queue:\n"));
1682 req = raidbp->req;
1683
1684 bp = raidbp->rf_obp;
1685 #if 0
1686 db1_printf(("bp=0x%x\n", bp));
1687 #endif
1688
1689 queue = (RF_DiskQueue_t *) req->queue;
1690
1691 if (raidbp->rf_buf.b_flags & B_ERROR) {
1692 #if 0
1693 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1694 #endif
1695 bp->b_flags |= B_ERROR;
1696 bp->b_error = raidbp->rf_buf.b_error ?
1697 raidbp->rf_buf.b_error : EIO;
1698 }
1699 #if 0
1700 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1701 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1702 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1703 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1704 #endif
1705
1706 /* XXX methinks this could be wrong... */
1707 #if 1
1708 bp->b_resid = raidbp->rf_buf.b_resid;
1709 #endif
1710
1711 if (req->tracerec) {
1712 RF_ETIMER_STOP(req->tracerec->timer);
1713 RF_ETIMER_EVAL(req->tracerec->timer);
1714 RF_LOCK_MUTEX(rf_tracing_mutex);
1715 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1716 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1717 req->tracerec->num_phys_ios++;
1718 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1719 }
1720 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1721
1722 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1723
1724
1725 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1726 * ballistic, and mark the component as hosed... */
1727 #if 1
1728 if (bp->b_flags & B_ERROR) {
1729 /* Mark the disk as dead */
1730 /* but only mark it once... */
1731 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1732 rf_ds_optimal) {
1733 printf("raid%d: IO Error. Marking %s as failed.\n",
1734 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1735 queue->raidPtr->Disks[queue->row][queue->col].status =
1736 rf_ds_failed;
1737 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1738 queue->raidPtr->numFailures++;
1739 /* XXX here we should bump the version number for each component, and write that data out */
1740 } else { /* Disk is already dead... */
1741 /* printf("Disk already marked as dead!\n"); */
1742 }
1743
1744 }
1745 #endif
1746
1747 rs = &raid_softc[unit];
1748 RAIDPUTBUF(rs, raidbp);
1749
1750
1751 if (bp->b_resid == 0) {
1752 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1753 unit, bp->b_resid, bp->b_bcount));
1754 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1755 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1756 } else {
1757 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1758 }
1759
1760 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1761 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1762 /* printf("Exiting KernelWakeupFunc\n"); */
1763
1764 splx(s); /* XXX */
1765 }
1766
1767
1768
1769 /*
1770 * initialize a buf structure for doing an I/O in the kernel.
1771 */
1772 static void
1773 InitBP(
1774 struct buf * bp,
1775 struct vnode * b_vp,
1776 unsigned rw_flag,
1777 dev_t dev,
1778 RF_SectorNum_t startSect,
1779 RF_SectorCount_t numSect,
1780 caddr_t buf,
1781 void (*cbFunc) (struct buf *),
1782 void *cbArg,
1783 int logBytesPerSector,
1784 struct proc * b_proc)
1785 {
1786 /* bp->b_flags = B_PHYS | rw_flag; */
1787 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1788 bp->b_bcount = numSect << logBytesPerSector;
1789 bp->b_bufsize = bp->b_bcount;
1790 bp->b_error = 0;
1791 bp->b_dev = dev;
1792 db1_printf(("bp->b_dev is %d\n", dev));
1793 bp->b_un.b_addr = buf;
1794 #if 0
1795 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1796 #endif
1797
1798 bp->b_blkno = startSect;
1799 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1800 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1801 if (bp->b_bcount == 0) {
1802 panic("bp->b_bcount is zero in InitBP!!\n");
1803 }
1804 bp->b_proc = b_proc;
1805 bp->b_iodone = cbFunc;
1806 bp->b_vp = b_vp;
1807
1808 }
1809 /* Extras... */
1810
1811 unsigned int
1812 rpcc()
1813 {
1814 /* XXX no clue what this is supposed to do.. my guess is that it's
1815 * supposed to read the CPU cycle counter... */
1816 /* db1_printf("this is supposed to do something useful too!??\n"); */
1817 return (0);
1818 }
1819 #if 0
1820 int
1821 rf_GetSpareTableFromDaemon(req)
1822 RF_SparetWait_t *req;
1823 {
1824 int retcode = 1;
1825 printf("This is supposed to do something useful!!\n"); /* XXX */
1826
1827 return (retcode);
1828
1829 }
1830 #endif
1831
1832 static void
1833 raidgetdefaultlabel(raidPtr, rs, lp)
1834 RF_Raid_t *raidPtr;
1835 struct raid_softc *rs;
1836 struct disklabel *lp;
1837 {
1838 db1_printf(("Building a default label...\n"));
1839 bzero(lp, sizeof(*lp));
1840
1841 /* fabricate a label... */
1842 lp->d_secperunit = raidPtr->totalSectors;
1843 lp->d_secsize = raidPtr->bytesPerSector;
1844 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1845 lp->d_ntracks = 1;
1846 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1847 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1848
1849 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1850 lp->d_type = DTYPE_RAID;
1851 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1852 lp->d_rpm = 3600;
1853 lp->d_interleave = 1;
1854 lp->d_flags = 0;
1855
1856 lp->d_partitions[RAW_PART].p_offset = 0;
1857 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1858 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1859 lp->d_npartitions = RAW_PART + 1;
1860
1861 lp->d_magic = DISKMAGIC;
1862 lp->d_magic2 = DISKMAGIC;
1863 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1864
1865 }
1866 /*
1867 * Read the disklabel from the raid device. If one is not present, fake one
1868 * up.
1869 */
1870 static void
1871 raidgetdisklabel(dev)
1872 dev_t dev;
1873 {
1874 int unit = raidunit(dev);
1875 struct raid_softc *rs = &raid_softc[unit];
1876 char *errstring;
1877 struct disklabel *lp = rs->sc_dkdev.dk_label;
1878 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1879 RF_Raid_t *raidPtr;
1880
1881 db1_printf(("Getting the disklabel...\n"));
1882
1883 bzero(clp, sizeof(*clp));
1884
1885 raidPtr = raidPtrs[unit];
1886
1887 raidgetdefaultlabel(raidPtr, rs, lp);
1888
1889 /*
1890 * Call the generic disklabel extraction routine.
1891 */
1892 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1893 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1894 if (errstring)
1895 raidmakedisklabel(rs);
1896 else {
1897 int i;
1898 struct partition *pp;
1899
1900 /*
1901 * Sanity check whether the found disklabel is valid.
1902 *
1903 * This is necessary since total size of the raid device
1904 * may vary when an interleave is changed even though exactly
1905 * same componets are used, and old disklabel may used
1906 * if that is found.
1907 */
1908 if (lp->d_secperunit != rs->sc_size)
1909 printf("WARNING: %s: "
1910 "total sector size in disklabel (%d) != "
1911 "the size of raid (%ld)\n", rs->sc_xname,
1912 lp->d_secperunit, (long) rs->sc_size);
1913 for (i = 0; i < lp->d_npartitions; i++) {
1914 pp = &lp->d_partitions[i];
1915 if (pp->p_offset + pp->p_size > rs->sc_size)
1916 printf("WARNING: %s: end of partition `%c' "
1917 "exceeds the size of raid (%ld)\n",
1918 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1919 }
1920 }
1921
1922 }
1923 /*
1924 * Take care of things one might want to take care of in the event
1925 * that a disklabel isn't present.
1926 */
1927 static void
1928 raidmakedisklabel(rs)
1929 struct raid_softc *rs;
1930 {
1931 struct disklabel *lp = rs->sc_dkdev.dk_label;
1932 db1_printf(("Making a label..\n"));
1933
1934 /*
1935 * For historical reasons, if there's no disklabel present
1936 * the raw partition must be marked FS_BSDFFS.
1937 */
1938
1939 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1940
1941 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1942
1943 lp->d_checksum = dkcksum(lp);
1944 }
1945 /*
1946 * Lookup the provided name in the filesystem. If the file exists,
1947 * is a valid block device, and isn't being used by anyone else,
1948 * set *vpp to the file's vnode.
1949 * You'll find the original of this in ccd.c
1950 */
1951 int
1952 raidlookup(path, p, vpp)
1953 char *path;
1954 struct proc *p;
1955 struct vnode **vpp; /* result */
1956 {
1957 struct nameidata nd;
1958 struct vnode *vp;
1959 struct vattr va;
1960 int error;
1961
1962 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1963 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1964 #ifdef DEBUG
1965 printf("RAIDframe: vn_open returned %d\n", error);
1966 #endif
1967 return (error);
1968 }
1969 vp = nd.ni_vp;
1970 if (vp->v_usecount > 1) {
1971 VOP_UNLOCK(vp, 0);
1972 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1973 return (EBUSY);
1974 }
1975 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1976 VOP_UNLOCK(vp, 0);
1977 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1978 return (error);
1979 }
1980 /* XXX: eventually we should handle VREG, too. */
1981 if (va.va_type != VBLK) {
1982 VOP_UNLOCK(vp, 0);
1983 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1984 return (ENOTBLK);
1985 }
1986 VOP_UNLOCK(vp, 0);
1987 *vpp = vp;
1988 return (0);
1989 }
1990 /*
1991 * Wait interruptibly for an exclusive lock.
1992 *
1993 * XXX
1994 * Several drivers do this; it should be abstracted and made MP-safe.
1995 * (Hmm... where have we seen this warning before :-> GO )
1996 */
1997 static int
1998 raidlock(rs)
1999 struct raid_softc *rs;
2000 {
2001 int error;
2002
2003 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2004 rs->sc_flags |= RAIDF_WANTED;
2005 if ((error =
2006 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2007 return (error);
2008 }
2009 rs->sc_flags |= RAIDF_LOCKED;
2010 return (0);
2011 }
2012 /*
2013 * Unlock and wake up any waiters.
2014 */
2015 static void
2016 raidunlock(rs)
2017 struct raid_softc *rs;
2018 {
2019
2020 rs->sc_flags &= ~RAIDF_LOCKED;
2021 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2022 rs->sc_flags &= ~RAIDF_WANTED;
2023 wakeup(rs);
2024 }
2025 }
2026
2027
2028 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2029 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2030
2031 int
2032 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2033 {
2034 RF_ComponentLabel_t component_label;
2035 raidread_component_label(dev, b_vp, &component_label);
2036 component_label.mod_counter = mod_counter;
2037 component_label.clean = RF_RAID_CLEAN;
2038 raidwrite_component_label(dev, b_vp, &component_label);
2039 return(0);
2040 }
2041
2042
2043 int
2044 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2045 {
2046 RF_ComponentLabel_t component_label;
2047 raidread_component_label(dev, b_vp, &component_label);
2048 component_label.mod_counter = mod_counter;
2049 component_label.clean = RF_RAID_DIRTY;
2050 raidwrite_component_label(dev, b_vp, &component_label);
2051 return(0);
2052 }
2053
2054 /* ARGSUSED */
2055 int
2056 raidread_component_label(dev, b_vp, component_label)
2057 dev_t dev;
2058 struct vnode *b_vp;
2059 RF_ComponentLabel_t *component_label;
2060 {
2061 struct buf *bp;
2062 int error;
2063
2064 /* XXX should probably ensure that we don't try to do this if
2065 someone has changed rf_protected_sectors. */
2066
2067 /* get a block of the appropriate size... */
2068 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2069 bp->b_dev = dev;
2070
2071 /* get our ducks in a row for the read */
2072 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2073 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2074 bp->b_flags = B_BUSY | B_READ;
2075 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2076
2077 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2078
2079 error = biowait(bp);
2080
2081 if (!error) {
2082 memcpy(component_label, bp->b_un.b_addr,
2083 sizeof(RF_ComponentLabel_t));
2084 #if 0
2085 printf("raidread_component_label: got component label:\n");
2086 printf("Version: %d\n",component_label->version);
2087 printf("Serial Number: %d\n",component_label->serial_number);
2088 printf("Mod counter: %d\n",component_label->mod_counter);
2089 printf("Row: %d\n", component_label->row);
2090 printf("Column: %d\n", component_label->column);
2091 printf("Num Rows: %d\n", component_label->num_rows);
2092 printf("Num Columns: %d\n", component_label->num_columns);
2093 printf("Clean: %d\n", component_label->clean);
2094 printf("Status: %d\n", component_label->status);
2095 #endif
2096 } else {
2097 printf("Failed to read RAID component label!\n");
2098 }
2099
2100 bp->b_flags = B_INVAL | B_AGE;
2101 brelse(bp);
2102 return(error);
2103 }
2104 /* ARGSUSED */
2105 int
2106 raidwrite_component_label(dev, b_vp, component_label)
2107 dev_t dev;
2108 struct vnode *b_vp;
2109 RF_ComponentLabel_t *component_label;
2110 {
2111 struct buf *bp;
2112 int error;
2113
2114 /* get a block of the appropriate size... */
2115 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2116 bp->b_dev = dev;
2117
2118 /* get our ducks in a row for the write */
2119 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2120 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2121 bp->b_flags = B_BUSY | B_WRITE;
2122 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2123
2124 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2125
2126 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2127
2128 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2129 error = biowait(bp);
2130 bp->b_flags = B_INVAL | B_AGE;
2131 brelse(bp);
2132 if (error) {
2133 printf("Failed to write RAID component info!\n");
2134 }
2135
2136 return(error);
2137 }
2138
2139 void
2140 rf_markalldirty( raidPtr )
2141 RF_Raid_t *raidPtr;
2142 {
2143 RF_ComponentLabel_t c_label;
2144 int r,c;
2145
2146 raidPtr->mod_counter++;
2147 for (r = 0; r < raidPtr->numRow; r++) {
2148 for (c = 0; c < raidPtr->numCol; c++) {
2149 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2150 raidread_component_label(
2151 raidPtr->Disks[r][c].dev,
2152 raidPtr->raid_cinfo[r][c].ci_vp,
2153 &c_label);
2154 if (c_label.status == rf_ds_spared) {
2155 /* XXX do something special...
2156 but whatever you do, don't
2157 try to access it!! */
2158 } else {
2159 #if 0
2160 c_label.status =
2161 raidPtr->Disks[r][c].status;
2162 raidwrite_component_label(
2163 raidPtr->Disks[r][c].dev,
2164 raidPtr->raid_cinfo[r][c].ci_vp,
2165 &c_label);
2166 #endif
2167 raidmarkdirty(
2168 raidPtr->Disks[r][c].dev,
2169 raidPtr->raid_cinfo[r][c].ci_vp,
2170 raidPtr->mod_counter);
2171 }
2172 }
2173 }
2174 }
2175 /* printf("Component labels marked dirty.\n"); */
2176 #if 0
2177 for( c = 0; c < raidPtr->numSpare ; c++) {
2178 sparecol = raidPtr->numCol + c;
2179 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2180 /*
2181
2182 XXX this is where we get fancy and map this spare
2183 into it's correct spot in the array.
2184
2185 */
2186 /*
2187
2188 we claim this disk is "optimal" if it's
2189 rf_ds_used_spare, as that means it should be
2190 directly substitutable for the disk it replaced.
2191 We note that too...
2192
2193 */
2194
2195 for(i=0;i<raidPtr->numRow;i++) {
2196 for(j=0;j<raidPtr->numCol;j++) {
2197 if ((raidPtr->Disks[i][j].spareRow ==
2198 r) &&
2199 (raidPtr->Disks[i][j].spareCol ==
2200 sparecol)) {
2201 srow = r;
2202 scol = sparecol;
2203 break;
2204 }
2205 }
2206 }
2207
2208 raidread_component_label(
2209 raidPtr->Disks[r][sparecol].dev,
2210 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2211 &c_label);
2212 /* make sure status is noted */
2213 c_label.version = RF_COMPONENT_LABEL_VERSION;
2214 c_label.mod_counter = raidPtr->mod_counter;
2215 c_label.serial_number = raidPtr->serial_number;
2216 c_label.row = srow;
2217 c_label.column = scol;
2218 c_label.num_rows = raidPtr->numRow;
2219 c_label.num_columns = raidPtr->numCol;
2220 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2221 c_label.status = rf_ds_optimal;
2222 raidwrite_component_label(
2223 raidPtr->Disks[r][sparecol].dev,
2224 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2225 &c_label);
2226 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2227 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2228 }
2229 }
2230
2231 #endif
2232 }
2233
2234
2235 void
2236 rf_update_component_labels( raidPtr )
2237 RF_Raid_t *raidPtr;
2238 {
2239 RF_ComponentLabel_t c_label;
2240 int sparecol;
2241 int r,c;
2242 int i,j;
2243 int srow, scol;
2244
2245 srow = -1;
2246 scol = -1;
2247
2248 /* XXX should do extra checks to make sure things really are clean,
2249 rather than blindly setting the clean bit... */
2250
2251 raidPtr->mod_counter++;
2252
2253 for (r = 0; r < raidPtr->numRow; r++) {
2254 for (c = 0; c < raidPtr->numCol; c++) {
2255 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2256 raidread_component_label(
2257 raidPtr->Disks[r][c].dev,
2258 raidPtr->raid_cinfo[r][c].ci_vp,
2259 &c_label);
2260 /* make sure status is noted */
2261 c_label.status = rf_ds_optimal;
2262 raidwrite_component_label(
2263 raidPtr->Disks[r][c].dev,
2264 raidPtr->raid_cinfo[r][c].ci_vp,
2265 &c_label);
2266 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2267 raidmarkclean(
2268 raidPtr->Disks[r][c].dev,
2269 raidPtr->raid_cinfo[r][c].ci_vp,
2270 raidPtr->mod_counter);
2271 }
2272 }
2273 /* else we don't touch it.. */
2274 #if 0
2275 else if (raidPtr->Disks[r][c].status !=
2276 rf_ds_failed) {
2277 raidread_component_label(
2278 raidPtr->Disks[r][c].dev,
2279 raidPtr->raid_cinfo[r][c].ci_vp,
2280 &c_label);
2281 /* make sure status is noted */
2282 c_label.status =
2283 raidPtr->Disks[r][c].status;
2284 raidwrite_component_label(
2285 raidPtr->Disks[r][c].dev,
2286 raidPtr->raid_cinfo[r][c].ci_vp,
2287 &c_label);
2288 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2289 raidmarkclean(
2290 raidPtr->Disks[r][c].dev,
2291 raidPtr->raid_cinfo[r][c].ci_vp,
2292 raidPtr->mod_counter);
2293 }
2294 }
2295 #endif
2296 }
2297 }
2298
2299 for( c = 0; c < raidPtr->numSpare ; c++) {
2300 sparecol = raidPtr->numCol + c;
2301 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2302 /*
2303
2304 we claim this disk is "optimal" if it's
2305 rf_ds_used_spare, as that means it should be
2306 directly substitutable for the disk it replaced.
2307 We note that too...
2308
2309 */
2310
2311 for(i=0;i<raidPtr->numRow;i++) {
2312 for(j=0;j<raidPtr->numCol;j++) {
2313 if ((raidPtr->Disks[i][j].spareRow ==
2314 0) &&
2315 (raidPtr->Disks[i][j].spareCol ==
2316 sparecol)) {
2317 srow = i;
2318 scol = j;
2319 break;
2320 }
2321 }
2322 }
2323
2324 raidread_component_label(
2325 raidPtr->Disks[0][sparecol].dev,
2326 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2327 &c_label);
2328 /* make sure status is noted */
2329 c_label.version = RF_COMPONENT_LABEL_VERSION;
2330 c_label.mod_counter = raidPtr->mod_counter;
2331 c_label.serial_number = raidPtr->serial_number;
2332 c_label.row = srow;
2333 c_label.column = scol;
2334 c_label.num_rows = raidPtr->numRow;
2335 c_label.num_columns = raidPtr->numCol;
2336 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2337 c_label.status = rf_ds_optimal;
2338 raidwrite_component_label(
2339 raidPtr->Disks[0][sparecol].dev,
2340 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2341 &c_label);
2342 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2343 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2344 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2345 raidPtr->mod_counter);
2346 }
2347 }
2348 }
2349 /* printf("Component labels updated\n"); */
2350 }
2351