rf_netbsdkintf.c revision 1.16.2.6 1 /* $NetBSD: rf_netbsdkintf.c,v 1.16.2.6 1999/09/28 04:46:28 cgd Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes...
275 */
276
277 #ifndef RAIDOUTSTANDING
278 #define RAIDOUTSTANDING 10
279 #endif
280
281 #define RAIDLABELDEV(dev) \
282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
283
284 /* declared here, and made public, for the benefit of KVM stuff.. */
285 struct raid_softc *raid_softc;
286
287 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
288 struct disklabel *));
289 static void raidgetdisklabel __P((dev_t));
290 static void raidmakedisklabel __P((struct raid_softc *));
291
292 static int raidlock __P((struct raid_softc *));
293 static void raidunlock __P((struct raid_softc *));
294 int raidlookup __P((char *, struct proc * p, struct vnode **));
295
296 static void rf_markalldirty __P((RF_Raid_t *));
297
298 void
299 raidattach(num)
300 int num;
301 {
302 int raidID;
303 int i, rc;
304
305 #ifdef DEBUG
306 printf("raidattach: Asked for %d units\n", num);
307 #endif
308
309 if (num <= 0) {
310 #ifdef DIAGNOSTIC
311 panic("raidattach: count <= 0");
312 #endif
313 return;
314 }
315 /* This is where all the initialization stuff gets done. */
316
317 /* Make some space for requested number of units... */
318
319 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
320 if (raidPtrs == NULL) {
321 panic("raidPtrs is NULL!!\n");
322 }
323
324 rc = rf_mutex_init(&rf_sparet_wait_mutex);
325 if (rc) {
326 RF_PANIC();
327 }
328
329 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
330 recon_queue = NULL;
331
332 for (i = 0; i < numraid; i++)
333 raidPtrs[i] = NULL;
334 rc = rf_BootRaidframe();
335 if (rc == 0)
336 printf("Kernelized RAIDframe activated\n");
337 else
338 panic("Serious error booting RAID!!\n");
339
340 rf_kbooted = RFK_BOOT_GOOD;
341
342 /* put together some datastructures like the CCD device does.. This
343 * lets us lock the device and what-not when it gets opened. */
344
345 raid_softc = (struct raid_softc *)
346 malloc(num * sizeof(struct raid_softc),
347 M_RAIDFRAME, M_NOWAIT);
348 if (raid_softc == NULL) {
349 printf("WARNING: no memory for RAIDframe driver\n");
350 return;
351 }
352 numraid = num;
353 bzero(raid_softc, num * sizeof(struct raid_softc));
354
355 for (raidID = 0; raidID < num; raidID++) {
356 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
357 (RF_Raid_t *));
358 if (raidPtrs[raidID] == NULL) {
359 printf("raidPtrs[%d] is NULL\n", raidID);
360 }
361 }
362 }
363
364
365 int
366 raidsize(dev)
367 dev_t dev;
368 {
369 struct raid_softc *rs;
370 struct disklabel *lp;
371 int part, unit, omask, size;
372
373 unit = raidunit(dev);
374 if (unit >= numraid)
375 return (-1);
376 rs = &raid_softc[unit];
377
378 if ((rs->sc_flags & RAIDF_INITED) == 0)
379 return (-1);
380
381 part = DISKPART(dev);
382 omask = rs->sc_dkdev.dk_openmask & (1 << part);
383 lp = rs->sc_dkdev.dk_label;
384
385 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
386 return (-1);
387
388 if (lp->d_partitions[part].p_fstype != FS_SWAP)
389 size = -1;
390 else
391 size = lp->d_partitions[part].p_size *
392 (lp->d_secsize / DEV_BSIZE);
393
394 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
395 return (-1);
396
397 return (size);
398
399 }
400
401 int
402 raiddump(dev, blkno, va, size)
403 dev_t dev;
404 daddr_t blkno;
405 caddr_t va;
406 size_t size;
407 {
408 /* Not implemented. */
409 return ENXIO;
410 }
411 /* ARGSUSED */
412 int
413 raidopen(dev, flags, fmt, p)
414 dev_t dev;
415 int flags, fmt;
416 struct proc *p;
417 {
418 int unit = raidunit(dev);
419 struct raid_softc *rs;
420 struct disklabel *lp;
421 int part, pmask;
422 int error = 0;
423
424 if (unit >= numraid)
425 return (ENXIO);
426 rs = &raid_softc[unit];
427
428 if ((error = raidlock(rs)) != 0)
429 return (error);
430 lp = rs->sc_dkdev.dk_label;
431
432 part = DISKPART(dev);
433 pmask = (1 << part);
434
435 db1_printf(("Opening raid device number: %d partition: %d\n",
436 unit, part));
437
438
439 if ((rs->sc_flags & RAIDF_INITED) &&
440 (rs->sc_dkdev.dk_openmask == 0))
441 raidgetdisklabel(dev);
442
443 /* make sure that this partition exists */
444
445 if (part != RAW_PART) {
446 db1_printf(("Not a raw partition..\n"));
447 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
448 ((part >= lp->d_npartitions) ||
449 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
450 error = ENXIO;
451 raidunlock(rs);
452 db1_printf(("Bailing out...\n"));
453 return (error);
454 }
455 }
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466
467 if ((rs->sc_dkdev.dk_openmask == 0) &&
468 ((rs->sc_flags & RAIDF_INITED) != 0)) {
469 /* First one... mark things as dirty... Note that we *MUST*
470 have done a configure before this. I DO NOT WANT TO BE
471 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
472 THAT THEY BELONG TOGETHER!!!!! */
473 /* XXX should check to see if we're only open for reading
474 here... If so, we needn't do this, but then need some
475 other way of keeping track of what's happened.. */
476
477 rf_markalldirty( raidPtrs[unit] );
478 }
479
480
481 rs->sc_dkdev.dk_openmask =
482 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
483
484 raidunlock(rs);
485
486 return (error);
487
488
489 }
490 /* ARGSUSED */
491 int
492 raidclose(dev, flags, fmt, p)
493 dev_t dev;
494 int flags, fmt;
495 struct proc *p;
496 {
497 int unit = raidunit(dev);
498 struct raid_softc *rs;
499 int error = 0;
500 int part;
501
502 if (unit >= numraid)
503 return (ENXIO);
504 rs = &raid_softc[unit];
505
506 if ((error = raidlock(rs)) != 0)
507 return (error);
508
509 part = DISKPART(dev);
510
511 /* ...that much closer to allowing unconfiguration... */
512 switch (fmt) {
513 case S_IFCHR:
514 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
515 break;
516
517 case S_IFBLK:
518 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
519 break;
520 }
521 rs->sc_dkdev.dk_openmask =
522 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
523
524 if ((rs->sc_dkdev.dk_openmask == 0) &&
525 ((rs->sc_flags & RAIDF_INITED) != 0)) {
526 /* Last one... device is not unconfigured yet.
527 Device shutdown has taken care of setting the
528 clean bits if RAIDF_INITED is not set
529 mark things as clean... */
530 rf_update_component_labels( raidPtrs[unit] );
531 }
532
533 raidunlock(rs);
534 return (0);
535
536 }
537
538 void
539 raidstrategy(bp)
540 register struct buf *bp;
541 {
542 register int s;
543
544 unsigned int raidID = raidunit(bp->b_dev);
545 RF_Raid_t *raidPtr;
546 struct raid_softc *rs = &raid_softc[raidID];
547 struct disklabel *lp;
548 int wlabel;
549
550 #if 0
551 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
552 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
553 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
554 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
555 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
556
557 if (bp->b_flags & B_READ)
558 db1_printf(("READ\n"));
559 else
560 db1_printf(("WRITE\n"));
561 #endif
562 if (rf_kbooted != RFK_BOOT_GOOD)
563 return;
564 if (raidID >= numraid || !raidPtrs[raidID]) {
565 bp->b_error = ENODEV;
566 bp->b_flags |= B_ERROR;
567 bp->b_resid = bp->b_bcount;
568 biodone(bp);
569 return;
570 }
571 raidPtr = raidPtrs[raidID];
572 if (!raidPtr->valid) {
573 bp->b_error = ENODEV;
574 bp->b_flags |= B_ERROR;
575 bp->b_resid = bp->b_bcount;
576 biodone(bp);
577 return;
578 }
579 if (bp->b_bcount == 0) {
580 db1_printf(("b_bcount is zero..\n"));
581 biodone(bp);
582 return;
583 }
584 lp = rs->sc_dkdev.dk_label;
585
586 /*
587 * Do bounds checking and adjust transfer. If there's an
588 * error, the bounds check will flag that for us.
589 */
590
591 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
592 if (DISKPART(bp->b_dev) != RAW_PART)
593 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
594 db1_printf(("Bounds check failed!!:%d %d\n",
595 (int) bp->b_blkno, (int) wlabel));
596 biodone(bp);
597 return;
598 }
599 s = splbio(); /* XXX Needed? */
600 db1_printf(("Beginning strategy...\n"));
601
602 bp->b_resid = 0;
603 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
604 NULL, NULL, NULL);
605 if (bp->b_error) {
606 bp->b_flags |= B_ERROR;
607 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
608 bp->b_error));
609 }
610 splx(s);
611 #if 0
612 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
613 bp, bp->b_data,
614 (int) bp->b_bcount, (int) bp->b_resid));
615 #endif
616 }
617 /* ARGSUSED */
618 int
619 raidread(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626 int part;
627
628 if (unit >= numraid)
629 return (ENXIO);
630 rs = &raid_softc[unit];
631
632 if ((rs->sc_flags & RAIDF_INITED) == 0)
633 return (ENXIO);
634 part = DISKPART(dev);
635
636 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
637
638 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
639
640 }
641 /* ARGSUSED */
642 int
643 raidwrite(dev, uio, flags)
644 dev_t dev;
645 struct uio *uio;
646 int flags;
647 {
648 int unit = raidunit(dev);
649 struct raid_softc *rs;
650
651 if (unit >= numraid)
652 return (ENXIO);
653 rs = &raid_softc[unit];
654
655 if ((rs->sc_flags & RAIDF_INITED) == 0)
656 return (ENXIO);
657 db1_printf(("raidwrite\n"));
658 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
659
660 }
661
662 int
663 raidioctl(dev, cmd, data, flag, p)
664 dev_t dev;
665 u_long cmd;
666 caddr_t data;
667 int flag;
668 struct proc *p;
669 {
670 int unit = raidunit(dev);
671 int error = 0;
672 int part, pmask;
673 struct raid_softc *rs;
674 #if 0
675 int r, c;
676 #endif
677 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
678
679 /* struct ccdbuf *cbp; */
680 /* struct raidbuf *raidbp; */
681 RF_Config_t *k_cfg, *u_cfg;
682 u_char *specific_buf;
683 int retcode = 0;
684 int row;
685 int column;
686 int s;
687 struct rf_recon_req *rrcopy, *rr;
688 RF_ComponentLabel_t *component_label;
689 RF_ComponentLabel_t ci_label;
690 RF_ComponentLabel_t **c_label_ptr;
691 RF_SingleComponent_t *sparePtr,*componentPtr;
692 RF_SingleComponent_t hot_spare;
693 RF_SingleComponent_t component;
694
695 if (unit >= numraid)
696 return (ENXIO);
697 rs = &raid_softc[unit];
698
699 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
700 (int) DISKPART(dev), (int) unit, (int) cmd));
701
702 /* Must be open for writes for these commands... */
703 switch (cmd) {
704 case DIOCSDINFO:
705 case DIOCWDINFO:
706 case DIOCWLABEL:
707 if ((flag & FWRITE) == 0)
708 return (EBADF);
709 }
710
711 /* Must be initialized for these... */
712 switch (cmd) {
713 case DIOCGDINFO:
714 case DIOCSDINFO:
715 case DIOCWDINFO:
716 case DIOCGPART:
717 case DIOCWLABEL:
718 case DIOCGDEFLABEL:
719 case RAIDFRAME_SHUTDOWN:
720 case RAIDFRAME_REWRITEPARITY:
721 case RAIDFRAME_GET_INFO:
722 case RAIDFRAME_RESET_ACCTOTALS:
723 case RAIDFRAME_GET_ACCTOTALS:
724 case RAIDFRAME_KEEP_ACCTOTALS:
725 case RAIDFRAME_GET_SIZE:
726 case RAIDFRAME_FAIL_DISK:
727 case RAIDFRAME_COPYBACK:
728 case RAIDFRAME_CHECKRECON:
729 case RAIDFRAME_GET_COMPONENT_LABEL:
730 case RAIDFRAME_SET_COMPONENT_LABEL:
731 case RAIDFRAME_ADD_HOT_SPARE:
732 case RAIDFRAME_REMOVE_HOT_SPARE:
733 case RAIDFRAME_INIT_LABELS:
734 case RAIDFRAME_REBUILD_IN_PLACE:
735 if ((rs->sc_flags & RAIDF_INITED) == 0)
736 return (ENXIO);
737 }
738
739 switch (cmd) {
740
741
742 /* configure the system */
743 case RAIDFRAME_CONFIGURE:
744
745 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
746 /* copy-in the configuration information */
747 /* data points to a pointer to the configuration structure */
748 u_cfg = *((RF_Config_t **) data);
749 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
750 if (k_cfg == NULL) {
751 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
752 return (ENOMEM);
753 }
754 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
755 sizeof(RF_Config_t));
756 if (retcode) {
757 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
758 retcode));
759 return (retcode);
760 }
761 /* allocate a buffer for the layout-specific data, and copy it
762 * in */
763 if (k_cfg->layoutSpecificSize) {
764 if (k_cfg->layoutSpecificSize > 10000) {
765 /* sanity check */
766 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
767 return (EINVAL);
768 }
769 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
770 (u_char *));
771 if (specific_buf == NULL) {
772 RF_Free(k_cfg, sizeof(RF_Config_t));
773 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
774 return (ENOMEM);
775 }
776 retcode = copyin(k_cfg->layoutSpecific,
777 (caddr_t) specific_buf,
778 k_cfg->layoutSpecificSize);
779 if (retcode) {
780 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
781 retcode));
782 return (retcode);
783 }
784 } else
785 specific_buf = NULL;
786 k_cfg->layoutSpecific = specific_buf;
787
788 /* should do some kind of sanity check on the configuration.
789 * Store the sum of all the bytes in the last byte? */
790
791 #if 0
792 db1_printf(("Considering configuring the system.:%d 0x%x\n",
793 unit, p));
794 #endif
795
796 /* We need the pointer to this a little deeper, so stash it
797 * here... */
798
799 raidPtrs[unit]->proc = p;
800
801 /* configure the system */
802
803 raidPtrs[unit]->raidid = unit;
804
805 retcode = rf_Configure(raidPtrs[unit], k_cfg);
806
807 /* allow this many simultaneous IO's to this RAID device */
808 raidPtrs[unit]->openings = RAIDOUTSTANDING;
809
810 if (retcode == 0) {
811 retcode = raidinit(dev, raidPtrs[unit], unit);
812 rf_markalldirty( raidPtrs[unit] );
813 }
814 /* free the buffers. No return code here. */
815 if (k_cfg->layoutSpecificSize) {
816 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
817 }
818 RF_Free(k_cfg, sizeof(RF_Config_t));
819
820 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
821 retcode));
822
823 return (retcode);
824
825 /* shutdown the system */
826 case RAIDFRAME_SHUTDOWN:
827
828 if ((error = raidlock(rs)) != 0)
829 return (error);
830
831 /*
832 * If somebody has a partition mounted, we shouldn't
833 * shutdown.
834 */
835
836 part = DISKPART(dev);
837 pmask = (1 << part);
838 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
839 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
840 (rs->sc_dkdev.dk_copenmask & pmask))) {
841 raidunlock(rs);
842 return (EBUSY);
843 }
844
845 if (rf_debugKernelAccess) {
846 printf("call shutdown\n");
847 }
848 raidPtrs[unit]->proc = p; /* XXX necessary evil */
849
850 retcode = rf_Shutdown(raidPtrs[unit]);
851
852 db1_printf(("Done main shutdown\n"));
853
854 pool_destroy(&rs->sc_cbufpool);
855 db1_printf(("Done freeing component buffer freelist\n"));
856
857 /* It's no longer initialized... */
858 rs->sc_flags &= ~RAIDF_INITED;
859
860 /* Detach the disk. */
861 disk_detach(&rs->sc_dkdev);
862
863 raidunlock(rs);
864
865 return (retcode);
866 case RAIDFRAME_GET_COMPONENT_LABEL:
867 c_label_ptr = (RF_ComponentLabel_t **) data;
868 /* need to read the component label for the disk indicated
869 by row,column in component_label
870 XXX need to sanity check these values!!!
871 */
872
873 /* For practice, let's get it directly fromdisk, rather
874 than from the in-core copy */
875 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
876 (RF_ComponentLabel_t *));
877 if (component_label == NULL)
878 return (ENOMEM);
879
880 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
881
882 retcode = copyin( *c_label_ptr, component_label,
883 sizeof(RF_ComponentLabel_t));
884
885 if (retcode) {
886 return(retcode);
887 }
888
889 row = component_label->row;
890 printf("Row: %d\n",row);
891 if (row > raidPtrs[unit]->numRow) {
892 row = 0; /* XXX */
893 }
894 column = component_label->column;
895 printf("Column: %d\n",column);
896 if (column > raidPtrs[unit]->numCol) {
897 column = 0; /* XXX */
898 }
899
900 raidread_component_label(
901 raidPtrs[unit]->Disks[row][column].dev,
902 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
903 component_label );
904
905 retcode = copyout((caddr_t) component_label,
906 (caddr_t) *c_label_ptr,
907 sizeof(RF_ComponentLabel_t));
908 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
909 return (retcode);
910
911 case RAIDFRAME_SET_COMPONENT_LABEL:
912 component_label = (RF_ComponentLabel_t *) data;
913
914 /* XXX check the label for valid stuff... */
915 /* Note that some things *should not* get modified --
916 the user should be re-initing the labels instead of
917 trying to patch things.
918 */
919
920 printf("Got component label:\n");
921 printf("Version: %d\n",component_label->version);
922 printf("Serial Number: %d\n",component_label->serial_number);
923 printf("Mod counter: %d\n",component_label->mod_counter);
924 printf("Row: %d\n", component_label->row);
925 printf("Column: %d\n", component_label->column);
926 printf("Num Rows: %d\n", component_label->num_rows);
927 printf("Num Columns: %d\n", component_label->num_columns);
928 printf("Clean: %d\n", component_label->clean);
929 printf("Status: %d\n", component_label->status);
930
931 row = component_label->row;
932 column = component_label->column;
933
934 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
935 (column < 0) || (column > raidPtrs[unit]->numCol)) {
936 return(EINVAL);
937 }
938
939 /* XXX this isn't allowed to do anything for now :-) */
940 #if 0
941 raidwrite_component_label(
942 raidPtrs[unit]->Disks[row][column].dev,
943 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
944 component_label );
945 #endif
946 return (0);
947
948 case RAIDFRAME_INIT_LABELS:
949 component_label = (RF_ComponentLabel_t *) data;
950 /*
951 we only want the serial number from
952 the above. We get all the rest of the information
953 from the config that was used to create this RAID
954 set.
955 */
956
957 raidPtrs[unit]->serial_number = component_label->serial_number;
958 /* current version number */
959 ci_label.version = RF_COMPONENT_LABEL_VERSION;
960 ci_label.serial_number = component_label->serial_number;
961 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
962 ci_label.num_rows = raidPtrs[unit]->numRow;
963 ci_label.num_columns = raidPtrs[unit]->numCol;
964 ci_label.clean = RF_RAID_DIRTY; /* not clean */
965 ci_label.status = rf_ds_optimal; /* "It's good!" */
966
967 for(row=0;row<raidPtrs[unit]->numRow;row++) {
968 ci_label.row = row;
969 for(column=0;column<raidPtrs[unit]->numCol;column++) {
970 ci_label.column = column;
971 raidwrite_component_label(
972 raidPtrs[unit]->Disks[row][column].dev,
973 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
974 &ci_label );
975 }
976 }
977
978 return (retcode);
979
980 /* initialize all parity */
981 case RAIDFRAME_REWRITEPARITY:
982
983 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
984 /* Parity for RAID 0 is trivially correct */
985 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
986 return(0);
987 }
988
989 /* borrow the thread of the requesting process */
990 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
991 retcode = rf_RewriteParity(raidPtrs[unit]);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1010 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1011 return(retcode);
1012
1013 case RAIDFRAME_REMOVE_HOT_SPARE:
1014 return(retcode);
1015
1016 case RAIDFRAME_REBUILD_IN_PLACE:
1017
1018 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1019 /* Can't do this on a RAID 0!! */
1020 return(EINVAL);
1021 }
1022
1023 componentPtr = (RF_SingleComponent_t *) data;
1024 memcpy( &component, componentPtr,
1025 sizeof(RF_SingleComponent_t));
1026 row = component.row;
1027 column = component.column;
1028 printf("Rebuild: %d %d\n",row, column);
1029 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1030 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1031 return(EINVAL);
1032 }
1033 printf("Attempting a rebuild in place\n");
1034 s = splbio();
1035 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1036 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1037 splx(s);
1038 return(retcode);
1039
1040 /* issue a test-unit-ready through raidframe to the indicated
1041 * device */
1042 #if 0 /* XXX not supported yet (ever?) */
1043 case RAIDFRAME_TUR:
1044 /* debug only */
1045 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1046 return (retcode);
1047 #endif
1048 case RAIDFRAME_GET_INFO:
1049 {
1050 RF_Raid_t *raid = raidPtrs[unit];
1051 RF_DeviceConfig_t *cfg, **ucfgp;
1052 int i, j, d;
1053
1054 if (!raid->valid)
1055 return (ENODEV);
1056 ucfgp = (RF_DeviceConfig_t **) data;
1057 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1058 (RF_DeviceConfig_t *));
1059 if (cfg == NULL)
1060 return (ENOMEM);
1061 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1062 cfg->rows = raid->numRow;
1063 cfg->cols = raid->numCol;
1064 cfg->ndevs = raid->numRow * raid->numCol;
1065 if (cfg->ndevs >= RF_MAX_DISKS) {
1066 cfg->ndevs = 0;
1067 return (ENOMEM);
1068 }
1069 cfg->nspares = raid->numSpare;
1070 if (cfg->nspares >= RF_MAX_DISKS) {
1071 cfg->nspares = 0;
1072 return (ENOMEM);
1073 }
1074 cfg->maxqdepth = raid->maxQueueDepth;
1075 d = 0;
1076 for (i = 0; i < cfg->rows; i++) {
1077 for (j = 0; j < cfg->cols; j++) {
1078 cfg->devs[d] = raid->Disks[i][j];
1079 d++;
1080 }
1081 }
1082 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1083 cfg->spares[i] = raid->Disks[0][j];
1084 }
1085 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1086 sizeof(RF_DeviceConfig_t));
1087 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1088
1089 return (retcode);
1090 }
1091 break;
1092
1093 case RAIDFRAME_RESET_ACCTOTALS:
1094 {
1095 RF_Raid_t *raid = raidPtrs[unit];
1096
1097 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1098 return (0);
1099 }
1100 break;
1101
1102 case RAIDFRAME_GET_ACCTOTALS:
1103 {
1104 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1105 RF_Raid_t *raid = raidPtrs[unit];
1106
1107 *totals = raid->acc_totals;
1108 return (0);
1109 }
1110 break;
1111
1112 case RAIDFRAME_KEEP_ACCTOTALS:
1113 {
1114 RF_Raid_t *raid = raidPtrs[unit];
1115 int *keep = (int *) data;
1116
1117 raid->keep_acc_totals = *keep;
1118 return (0);
1119 }
1120 break;
1121
1122 case RAIDFRAME_GET_SIZE:
1123 *(int *) data = raidPtrs[unit]->totalSectors;
1124 return (0);
1125
1126 #define RAIDFRAME_RECON 1
1127 /* XXX The above should probably be set somewhere else!! GO */
1128 #if RAIDFRAME_RECON > 0
1129
1130 /* fail a disk & optionally start reconstruction */
1131 case RAIDFRAME_FAIL_DISK:
1132
1133 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1134 /* Can't do this on a RAID 0!! */
1135 return(EINVAL);
1136 }
1137
1138 rr = (struct rf_recon_req *) data;
1139
1140 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1141 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1142 return (EINVAL);
1143
1144 printf("raid%d: Failing the disk: row: %d col: %d\n",
1145 unit, rr->row, rr->col);
1146
1147 /* make a copy of the recon request so that we don't rely on
1148 * the user's buffer */
1149 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1150 bcopy(rr, rrcopy, sizeof(*rr));
1151 rrcopy->raidPtr = (void *) raidPtrs[unit];
1152
1153 LOCK_RECON_Q_MUTEX();
1154 rrcopy->next = recon_queue;
1155 recon_queue = rrcopy;
1156 wakeup(&recon_queue);
1157 UNLOCK_RECON_Q_MUTEX();
1158
1159 return (0);
1160
1161 /* invoke a copyback operation after recon on whatever disk
1162 * needs it, if any */
1163 case RAIDFRAME_COPYBACK:
1164
1165 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1166 /* This makes no sense on a RAID 0!! */
1167 return(EINVAL);
1168 }
1169
1170 /* borrow the current thread to get this done */
1171 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1172 s = splbio();
1173 rf_CopybackReconstructedData(raidPtrs[unit]);
1174 splx(s);
1175 return (0);
1176
1177 /* return the percentage completion of reconstruction */
1178 case RAIDFRAME_CHECKRECON:
1179 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1180 /* This makes no sense on a RAID 0 */
1181 return(EINVAL);
1182 }
1183
1184 row = *(int *) data;
1185 if (row < 0 || row >= raidPtrs[unit]->numRow)
1186 return (EINVAL);
1187 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1188 *(int *) data = 100;
1189 else
1190 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1191 return (0);
1192
1193 /* the sparetable daemon calls this to wait for the kernel to
1194 * need a spare table. this ioctl does not return until a
1195 * spare table is needed. XXX -- calling mpsleep here in the
1196 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1197 * -- I should either compute the spare table in the kernel,
1198 * or have a different -- XXX XXX -- interface (a different
1199 * character device) for delivering the table -- XXX */
1200 #if 0
1201 case RAIDFRAME_SPARET_WAIT:
1202 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1203 while (!rf_sparet_wait_queue)
1204 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1205 waitreq = rf_sparet_wait_queue;
1206 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1207 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1208
1209 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1210
1211 RF_Free(waitreq, sizeof(*waitreq));
1212 return (0);
1213
1214
1215 /* wakes up a process waiting on SPARET_WAIT and puts an error
1216 * code in it that will cause the dameon to exit */
1217 case RAIDFRAME_ABORT_SPARET_WAIT:
1218 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1219 waitreq->fcol = -1;
1220 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1221 waitreq->next = rf_sparet_wait_queue;
1222 rf_sparet_wait_queue = waitreq;
1223 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1224 wakeup(&rf_sparet_wait_queue);
1225 return (0);
1226
1227 /* used by the spare table daemon to deliver a spare table
1228 * into the kernel */
1229 case RAIDFRAME_SEND_SPARET:
1230
1231 /* install the spare table */
1232 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1233
1234 /* respond to the requestor. the return status of the spare
1235 * table installation is passed in the "fcol" field */
1236 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1237 waitreq->fcol = retcode;
1238 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1239 waitreq->next = rf_sparet_resp_queue;
1240 rf_sparet_resp_queue = waitreq;
1241 wakeup(&rf_sparet_resp_queue);
1242 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1243
1244 return (retcode);
1245 #endif
1246
1247
1248 #endif /* RAIDFRAME_RECON > 0 */
1249
1250 default:
1251 break; /* fall through to the os-specific code below */
1252
1253 }
1254
1255 if (!raidPtrs[unit]->valid)
1256 return (EINVAL);
1257
1258 /*
1259 * Add support for "regular" device ioctls here.
1260 */
1261
1262 switch (cmd) {
1263 case DIOCGDINFO:
1264 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1265 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1266 break;
1267
1268 case DIOCGPART:
1269 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1270 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1271 ((struct partinfo *) data)->part =
1272 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1273 break;
1274
1275 case DIOCWDINFO:
1276 db1_printf(("DIOCWDINFO\n"));
1277 case DIOCSDINFO:
1278 db1_printf(("DIOCSDINFO\n"));
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 rs->sc_flags |= RAIDF_LABELLING;
1283
1284 error = setdisklabel(rs->sc_dkdev.dk_label,
1285 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1286 if (error == 0) {
1287 if (cmd == DIOCWDINFO)
1288 error = writedisklabel(RAIDLABELDEV(dev),
1289 raidstrategy, rs->sc_dkdev.dk_label,
1290 rs->sc_dkdev.dk_cpulabel);
1291 }
1292 rs->sc_flags &= ~RAIDF_LABELLING;
1293
1294 raidunlock(rs);
1295
1296 if (error)
1297 return (error);
1298 break;
1299
1300 case DIOCWLABEL:
1301 db1_printf(("DIOCWLABEL\n"));
1302 if (*(int *) data != 0)
1303 rs->sc_flags |= RAIDF_WLABEL;
1304 else
1305 rs->sc_flags &= ~RAIDF_WLABEL;
1306 break;
1307
1308 case DIOCGDEFLABEL:
1309 db1_printf(("DIOCGDEFLABEL\n"));
1310 raidgetdefaultlabel(raidPtrs[unit], rs,
1311 (struct disklabel *) data);
1312 break;
1313
1314 default:
1315 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1316 }
1317 return (retcode);
1318
1319 }
1320
1321
1322 /* raidinit -- complete the rest of the initialization for the
1323 RAIDframe device. */
1324
1325
1326 static int
1327 raidinit(dev, raidPtr, unit)
1328 dev_t dev;
1329 RF_Raid_t *raidPtr;
1330 int unit;
1331 {
1332 int retcode;
1333 /* int ix; */
1334 /* struct raidbuf *raidbp; */
1335 struct raid_softc *rs;
1336
1337 retcode = 0;
1338
1339 rs = &raid_softc[unit];
1340 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1341 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1342
1343
1344 /* XXX should check return code first... */
1345 rs->sc_flags |= RAIDF_INITED;
1346
1347 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1348
1349 rs->sc_dkdev.dk_name = rs->sc_xname;
1350
1351 /* disk_attach actually creates space for the CPU disklabel, among
1352 * other things, so it's critical to call this *BEFORE* we try putzing
1353 * with disklabels. */
1354
1355 disk_attach(&rs->sc_dkdev);
1356
1357 /* XXX There may be a weird interaction here between this, and
1358 * protectedSectors, as used in RAIDframe. */
1359
1360 rs->sc_size = raidPtr->totalSectors;
1361 rs->sc_dev = dev;
1362
1363 return (retcode);
1364 }
1365
1366 /*
1367 * This kernel thread never exits. It is created once, and persists
1368 * until the system reboots.
1369 */
1370
1371 void
1372 rf_ReconKernelThread()
1373 {
1374 struct rf_recon_req *req;
1375 int s;
1376
1377 /* XXX not sure what spl() level we should be at here... probably
1378 * splbio() */
1379 s = splbio();
1380
1381 while (1) {
1382 /* grab the next reconstruction request from the queue */
1383 LOCK_RECON_Q_MUTEX();
1384 while (!recon_queue) {
1385 UNLOCK_RECON_Q_MUTEX();
1386 tsleep(&recon_queue, PRIBIO,
1387 "raidframe recon", 0);
1388 LOCK_RECON_Q_MUTEX();
1389 }
1390 req = recon_queue;
1391 recon_queue = recon_queue->next;
1392 UNLOCK_RECON_Q_MUTEX();
1393
1394 /*
1395 * If flags specifies that we should start recon, this call
1396 * will not return until reconstruction completes, fails,
1397 * or is aborted.
1398 */
1399 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1400 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1401
1402 RF_Free(req, sizeof(*req));
1403 }
1404 }
1405 /* wake up the daemon & tell it to get us a spare table
1406 * XXX
1407 * the entries in the queues should be tagged with the raidPtr
1408 * so that in the extremely rare case that two recons happen at once,
1409 * we know for which device were requesting a spare table
1410 * XXX
1411 */
1412 int
1413 rf_GetSpareTableFromDaemon(req)
1414 RF_SparetWait_t *req;
1415 {
1416 int retcode;
1417
1418 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1419 req->next = rf_sparet_wait_queue;
1420 rf_sparet_wait_queue = req;
1421 wakeup(&rf_sparet_wait_queue);
1422
1423 /* mpsleep unlocks the mutex */
1424 while (!rf_sparet_resp_queue) {
1425 tsleep(&rf_sparet_resp_queue, PRIBIO,
1426 "raidframe getsparetable", 0);
1427 #if 0
1428 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1429 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1430 MS_LOCK_SIMPLE);
1431 #endif
1432 }
1433 req = rf_sparet_resp_queue;
1434 rf_sparet_resp_queue = req->next;
1435 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1436
1437 retcode = req->fcol;
1438 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1439 * alloc'd */
1440 return (retcode);
1441 }
1442 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1443 * bp & passes it down.
1444 * any calls originating in the kernel must use non-blocking I/O
1445 * do some extra sanity checking to return "appropriate" error values for
1446 * certain conditions (to make some standard utilities work)
1447 */
1448 int
1449 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1450 RF_Raid_t *raidPtr;
1451 struct buf *bp;
1452 RF_RaidAccessFlags_t flags;
1453 void (*cbFunc) (struct buf *);
1454 void *cbArg;
1455 {
1456 RF_SectorCount_t num_blocks, pb, sum;
1457 RF_RaidAddr_t raid_addr;
1458 int retcode;
1459 struct partition *pp;
1460 daddr_t blocknum;
1461 int unit;
1462 struct raid_softc *rs;
1463 int do_async;
1464
1465 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1466
1467 unit = raidPtr->raidid;
1468 rs = &raid_softc[unit];
1469
1470 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1471 * partition.. Need to make it absolute to the underlying device.. */
1472
1473 blocknum = bp->b_blkno;
1474 if (DISKPART(bp->b_dev) != RAW_PART) {
1475 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1476 blocknum += pp->p_offset;
1477 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1478 pp->p_offset));
1479 } else {
1480 db1_printf(("Is raw..\n"));
1481 }
1482 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1483
1484 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1485 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1486
1487 /* *THIS* is where we adjust what block we're going to... but DO NOT
1488 * TOUCH bp->b_blkno!!! */
1489 raid_addr = blocknum;
1490
1491 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1492 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1493 sum = raid_addr + num_blocks + pb;
1494 if (1 || rf_debugKernelAccess) {
1495 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1496 (int) raid_addr, (int) sum, (int) num_blocks,
1497 (int) pb, (int) bp->b_resid));
1498 }
1499 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1500 || (sum < num_blocks) || (sum < pb)) {
1501 bp->b_error = ENOSPC;
1502 bp->b_flags |= B_ERROR;
1503 bp->b_resid = bp->b_bcount;
1504 biodone(bp);
1505 return (bp->b_error);
1506 }
1507 /*
1508 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1509 */
1510
1511 if (bp->b_bcount & raidPtr->sectorMask) {
1512 bp->b_error = EINVAL;
1513 bp->b_flags |= B_ERROR;
1514 bp->b_resid = bp->b_bcount;
1515 biodone(bp);
1516 return (bp->b_error);
1517 }
1518 db1_printf(("Calling DoAccess..\n"));
1519
1520
1521 /* Put a throttle on the number of requests we handle simultanously */
1522
1523 RF_LOCK_MUTEX(raidPtr->mutex);
1524
1525 while(raidPtr->openings <= 0) {
1526 RF_UNLOCK_MUTEX(raidPtr->mutex);
1527 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1528 RF_LOCK_MUTEX(raidPtr->mutex);
1529 }
1530 raidPtr->openings--;
1531
1532 RF_UNLOCK_MUTEX(raidPtr->mutex);
1533
1534 /*
1535 * Everything is async.
1536 */
1537 do_async = 1;
1538
1539 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1540 * B_READ instead */
1541 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1542 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1543 do_async, raid_addr, num_blocks,
1544 bp->b_un.b_addr,
1545 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1546 NULL, cbFunc, cbArg);
1547 #if 0
1548 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1549 bp->b_data, (int) bp->b_resid));
1550 #endif
1551
1552 return (retcode);
1553 }
1554 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1555
1556 int
1557 rf_DispatchKernelIO(queue, req)
1558 RF_DiskQueue_t *queue;
1559 RF_DiskQueueData_t *req;
1560 {
1561 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1562 struct buf *bp;
1563 struct raidbuf *raidbp = NULL;
1564 struct raid_softc *rs;
1565 int unit;
1566
1567 /* XXX along with the vnode, we also need the softc associated with
1568 * this device.. */
1569
1570 req->queue = queue;
1571
1572 unit = queue->raidPtr->raidid;
1573
1574 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1575
1576 if (unit >= numraid) {
1577 printf("Invalid unit number: %d %d\n", unit, numraid);
1578 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1579 }
1580 rs = &raid_softc[unit];
1581
1582 /* XXX is this the right place? */
1583 disk_busy(&rs->sc_dkdev);
1584
1585 bp = req->bp;
1586 #if 1
1587 /* XXX when there is a physical disk failure, someone is passing us a
1588 * buffer that contains old stuff!! Attempt to deal with this problem
1589 * without taking a performance hit... (not sure where the real bug
1590 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1591
1592 if (bp->b_flags & B_ERROR) {
1593 bp->b_flags &= ~B_ERROR;
1594 }
1595 if (bp->b_error != 0) {
1596 bp->b_error = 0;
1597 }
1598 #endif
1599 raidbp = RAIDGETBUF(rs);
1600
1601 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1602
1603 /*
1604 * context for raidiodone
1605 */
1606 raidbp->rf_obp = bp;
1607 raidbp->req = req;
1608
1609 switch (req->type) {
1610 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1611 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1612 * queue->row, queue->col); */
1613 /* XXX need to do something extra here.. */
1614 /* I'm leaving this in, as I've never actually seen it used,
1615 * and I'd like folks to report it... GO */
1616 printf(("WAKEUP CALLED\n"));
1617 queue->numOutstanding++;
1618
1619 /* XXX need to glue the original buffer into this?? */
1620
1621 KernelWakeupFunc(&raidbp->rf_buf);
1622 break;
1623
1624 case RF_IO_TYPE_READ:
1625 case RF_IO_TYPE_WRITE:
1626
1627 if (req->tracerec) {
1628 RF_ETIMER_START(req->tracerec->timer);
1629 }
1630 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1631 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1632 req->sectorOffset, req->numSector,
1633 req->buf, KernelWakeupFunc, (void *) req,
1634 queue->raidPtr->logBytesPerSector, req->b_proc);
1635
1636 if (rf_debugKernelAccess) {
1637 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1638 (long) bp->b_blkno));
1639 }
1640 queue->numOutstanding++;
1641 queue->last_deq_sector = req->sectorOffset;
1642 /* acc wouldn't have been let in if there were any pending
1643 * reqs at any other priority */
1644 queue->curPriority = req->priority;
1645 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1646 * req->type, queue->row, queue->col); */
1647
1648 db1_printf(("Going for %c to unit %d row %d col %d\n",
1649 req->type, unit, queue->row, queue->col));
1650 db1_printf(("sector %d count %d (%d bytes) %d\n",
1651 (int) req->sectorOffset, (int) req->numSector,
1652 (int) (req->numSector <<
1653 queue->raidPtr->logBytesPerSector),
1654 (int) queue->raidPtr->logBytesPerSector));
1655 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1656 raidbp->rf_buf.b_vp->v_numoutput++;
1657 }
1658 VOP_STRATEGY(&raidbp->rf_buf);
1659
1660 break;
1661
1662 default:
1663 panic("bad req->type in rf_DispatchKernelIO");
1664 }
1665 db1_printf(("Exiting from DispatchKernelIO\n"));
1666 return (0);
1667 }
1668 /* this is the callback function associated with a I/O invoked from
1669 kernel code.
1670 */
1671 static void
1672 KernelWakeupFunc(vbp)
1673 struct buf *vbp;
1674 {
1675 RF_DiskQueueData_t *req = NULL;
1676 RF_DiskQueue_t *queue;
1677 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1678 struct buf *bp;
1679 struct raid_softc *rs;
1680 int unit;
1681 register int s;
1682
1683 s = splbio(); /* XXX */
1684 db1_printf(("recovering the request queue:\n"));
1685 req = raidbp->req;
1686
1687 bp = raidbp->rf_obp;
1688 #if 0
1689 db1_printf(("bp=0x%x\n", bp));
1690 #endif
1691
1692 queue = (RF_DiskQueue_t *) req->queue;
1693
1694 if (raidbp->rf_buf.b_flags & B_ERROR) {
1695 #if 0
1696 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1697 #endif
1698 bp->b_flags |= B_ERROR;
1699 bp->b_error = raidbp->rf_buf.b_error ?
1700 raidbp->rf_buf.b_error : EIO;
1701 }
1702 #if 0
1703 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1704 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1705 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1706 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1707 #endif
1708
1709 /* XXX methinks this could be wrong... */
1710 #if 1
1711 bp->b_resid = raidbp->rf_buf.b_resid;
1712 #endif
1713
1714 if (req->tracerec) {
1715 RF_ETIMER_STOP(req->tracerec->timer);
1716 RF_ETIMER_EVAL(req->tracerec->timer);
1717 RF_LOCK_MUTEX(rf_tracing_mutex);
1718 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1719 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1720 req->tracerec->num_phys_ios++;
1721 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1722 }
1723 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1724
1725 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1726
1727
1728 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1729 * ballistic, and mark the component as hosed... */
1730 #if 1
1731 if (bp->b_flags & B_ERROR) {
1732 /* Mark the disk as dead */
1733 /* but only mark it once... */
1734 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1735 rf_ds_optimal) {
1736 printf("raid%d: IO Error. Marking %s as failed.\n",
1737 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1738 queue->raidPtr->Disks[queue->row][queue->col].status =
1739 rf_ds_failed;
1740 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1741 queue->raidPtr->numFailures++;
1742 /* XXX here we should bump the version number for each component, and write that data out */
1743 } else { /* Disk is already dead... */
1744 /* printf("Disk already marked as dead!\n"); */
1745 }
1746
1747 }
1748 #endif
1749
1750 rs = &raid_softc[unit];
1751 RAIDPUTBUF(rs, raidbp);
1752
1753
1754 if (bp->b_resid == 0) {
1755 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1756 unit, bp->b_resid, bp->b_bcount));
1757 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1758 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1759 } else {
1760 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1761 }
1762
1763 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1764 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1765 /* printf("Exiting KernelWakeupFunc\n"); */
1766
1767 splx(s); /* XXX */
1768 }
1769
1770
1771
1772 /*
1773 * initialize a buf structure for doing an I/O in the kernel.
1774 */
1775 static void
1776 InitBP(
1777 struct buf * bp,
1778 struct vnode * b_vp,
1779 unsigned rw_flag,
1780 dev_t dev,
1781 RF_SectorNum_t startSect,
1782 RF_SectorCount_t numSect,
1783 caddr_t buf,
1784 void (*cbFunc) (struct buf *),
1785 void *cbArg,
1786 int logBytesPerSector,
1787 struct proc * b_proc)
1788 {
1789 /* bp->b_flags = B_PHYS | rw_flag; */
1790 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1791 bp->b_bcount = numSect << logBytesPerSector;
1792 bp->b_bufsize = bp->b_bcount;
1793 bp->b_error = 0;
1794 bp->b_dev = dev;
1795 db1_printf(("bp->b_dev is %d\n", dev));
1796 bp->b_un.b_addr = buf;
1797 #if 0
1798 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1799 #endif
1800
1801 bp->b_blkno = startSect;
1802 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1803 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1804 if (bp->b_bcount == 0) {
1805 panic("bp->b_bcount is zero in InitBP!!\n");
1806 }
1807 bp->b_proc = b_proc;
1808 bp->b_iodone = cbFunc;
1809 bp->b_vp = b_vp;
1810
1811 }
1812 /* Extras... */
1813
1814 unsigned int
1815 rpcc()
1816 {
1817 /* XXX no clue what this is supposed to do.. my guess is that it's
1818 * supposed to read the CPU cycle counter... */
1819 /* db1_printf("this is supposed to do something useful too!??\n"); */
1820 return (0);
1821 }
1822 #if 0
1823 int
1824 rf_GetSpareTableFromDaemon(req)
1825 RF_SparetWait_t *req;
1826 {
1827 int retcode = 1;
1828 printf("This is supposed to do something useful!!\n"); /* XXX */
1829
1830 return (retcode);
1831
1832 }
1833 #endif
1834
1835 static void
1836 raidgetdefaultlabel(raidPtr, rs, lp)
1837 RF_Raid_t *raidPtr;
1838 struct raid_softc *rs;
1839 struct disklabel *lp;
1840 {
1841 db1_printf(("Building a default label...\n"));
1842 bzero(lp, sizeof(*lp));
1843
1844 /* fabricate a label... */
1845 lp->d_secperunit = raidPtr->totalSectors;
1846 lp->d_secsize = raidPtr->bytesPerSector;
1847 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1848 lp->d_ntracks = 1;
1849 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1850 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1851
1852 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1853 lp->d_type = DTYPE_RAID;
1854 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1855 lp->d_rpm = 3600;
1856 lp->d_interleave = 1;
1857 lp->d_flags = 0;
1858
1859 lp->d_partitions[RAW_PART].p_offset = 0;
1860 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1861 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1862 lp->d_npartitions = RAW_PART + 1;
1863
1864 lp->d_magic = DISKMAGIC;
1865 lp->d_magic2 = DISKMAGIC;
1866 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1867
1868 }
1869 /*
1870 * Read the disklabel from the raid device. If one is not present, fake one
1871 * up.
1872 */
1873 static void
1874 raidgetdisklabel(dev)
1875 dev_t dev;
1876 {
1877 int unit = raidunit(dev);
1878 struct raid_softc *rs = &raid_softc[unit];
1879 char *errstring;
1880 struct disklabel *lp = rs->sc_dkdev.dk_label;
1881 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1882 RF_Raid_t *raidPtr;
1883
1884 db1_printf(("Getting the disklabel...\n"));
1885
1886 bzero(clp, sizeof(*clp));
1887
1888 raidPtr = raidPtrs[unit];
1889
1890 raidgetdefaultlabel(raidPtr, rs, lp);
1891
1892 /*
1893 * Call the generic disklabel extraction routine.
1894 */
1895 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1896 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1897 if (errstring)
1898 raidmakedisklabel(rs);
1899 else {
1900 int i;
1901 struct partition *pp;
1902
1903 /*
1904 * Sanity check whether the found disklabel is valid.
1905 *
1906 * This is necessary since total size of the raid device
1907 * may vary when an interleave is changed even though exactly
1908 * same componets are used, and old disklabel may used
1909 * if that is found.
1910 */
1911 if (lp->d_secperunit != rs->sc_size)
1912 printf("WARNING: %s: "
1913 "total sector size in disklabel (%d) != "
1914 "the size of raid (%ld)\n", rs->sc_xname,
1915 lp->d_secperunit, (long) rs->sc_size);
1916 for (i = 0; i < lp->d_npartitions; i++) {
1917 pp = &lp->d_partitions[i];
1918 if (pp->p_offset + pp->p_size > rs->sc_size)
1919 printf("WARNING: %s: end of partition `%c' "
1920 "exceeds the size of raid (%ld)\n",
1921 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1922 }
1923 }
1924
1925 }
1926 /*
1927 * Take care of things one might want to take care of in the event
1928 * that a disklabel isn't present.
1929 */
1930 static void
1931 raidmakedisklabel(rs)
1932 struct raid_softc *rs;
1933 {
1934 struct disklabel *lp = rs->sc_dkdev.dk_label;
1935 db1_printf(("Making a label..\n"));
1936
1937 /*
1938 * For historical reasons, if there's no disklabel present
1939 * the raw partition must be marked FS_BSDFFS.
1940 */
1941
1942 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1943
1944 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1945
1946 lp->d_checksum = dkcksum(lp);
1947 }
1948 /*
1949 * Lookup the provided name in the filesystem. If the file exists,
1950 * is a valid block device, and isn't being used by anyone else,
1951 * set *vpp to the file's vnode.
1952 * You'll find the original of this in ccd.c
1953 */
1954 int
1955 raidlookup(path, p, vpp)
1956 char *path;
1957 struct proc *p;
1958 struct vnode **vpp; /* result */
1959 {
1960 struct nameidata nd;
1961 struct vnode *vp;
1962 struct vattr va;
1963 int error;
1964
1965 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1966 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1967 #ifdef DEBUG
1968 printf("RAIDframe: vn_open returned %d\n", error);
1969 #endif
1970 return (error);
1971 }
1972 vp = nd.ni_vp;
1973 if (vp->v_usecount > 1) {
1974 VOP_UNLOCK(vp, 0);
1975 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1976 return (EBUSY);
1977 }
1978 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1979 VOP_UNLOCK(vp, 0);
1980 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1981 return (error);
1982 }
1983 /* XXX: eventually we should handle VREG, too. */
1984 if (va.va_type != VBLK) {
1985 VOP_UNLOCK(vp, 0);
1986 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1987 return (ENOTBLK);
1988 }
1989 VOP_UNLOCK(vp, 0);
1990 *vpp = vp;
1991 return (0);
1992 }
1993 /*
1994 * Wait interruptibly for an exclusive lock.
1995 *
1996 * XXX
1997 * Several drivers do this; it should be abstracted and made MP-safe.
1998 * (Hmm... where have we seen this warning before :-> GO )
1999 */
2000 static int
2001 raidlock(rs)
2002 struct raid_softc *rs;
2003 {
2004 int error;
2005
2006 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2007 rs->sc_flags |= RAIDF_WANTED;
2008 if ((error =
2009 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2010 return (error);
2011 }
2012 rs->sc_flags |= RAIDF_LOCKED;
2013 return (0);
2014 }
2015 /*
2016 * Unlock and wake up any waiters.
2017 */
2018 static void
2019 raidunlock(rs)
2020 struct raid_softc *rs;
2021 {
2022
2023 rs->sc_flags &= ~RAIDF_LOCKED;
2024 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2025 rs->sc_flags &= ~RAIDF_WANTED;
2026 wakeup(rs);
2027 }
2028 }
2029
2030
2031 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2032 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2033
2034 int
2035 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2036 {
2037 RF_ComponentLabel_t component_label;
2038 raidread_component_label(dev, b_vp, &component_label);
2039 component_label.mod_counter = mod_counter;
2040 component_label.clean = RF_RAID_CLEAN;
2041 raidwrite_component_label(dev, b_vp, &component_label);
2042 return(0);
2043 }
2044
2045
2046 int
2047 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2048 {
2049 RF_ComponentLabel_t component_label;
2050 raidread_component_label(dev, b_vp, &component_label);
2051 component_label.mod_counter = mod_counter;
2052 component_label.clean = RF_RAID_DIRTY;
2053 raidwrite_component_label(dev, b_vp, &component_label);
2054 return(0);
2055 }
2056
2057 /* ARGSUSED */
2058 int
2059 raidread_component_label(dev, b_vp, component_label)
2060 dev_t dev;
2061 struct vnode *b_vp;
2062 RF_ComponentLabel_t *component_label;
2063 {
2064 struct buf *bp;
2065 int error;
2066
2067 /* XXX should probably ensure that we don't try to do this if
2068 someone has changed rf_protected_sectors. */
2069
2070 /* get a block of the appropriate size... */
2071 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2072 bp->b_dev = dev;
2073
2074 /* get our ducks in a row for the read */
2075 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2076 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2077 bp->b_flags = B_BUSY | B_READ;
2078 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2079
2080 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2081
2082 error = biowait(bp);
2083
2084 if (!error) {
2085 memcpy(component_label, bp->b_un.b_addr,
2086 sizeof(RF_ComponentLabel_t));
2087 #if 0
2088 printf("raidread_component_label: got component label:\n");
2089 printf("Version: %d\n",component_label->version);
2090 printf("Serial Number: %d\n",component_label->serial_number);
2091 printf("Mod counter: %d\n",component_label->mod_counter);
2092 printf("Row: %d\n", component_label->row);
2093 printf("Column: %d\n", component_label->column);
2094 printf("Num Rows: %d\n", component_label->num_rows);
2095 printf("Num Columns: %d\n", component_label->num_columns);
2096 printf("Clean: %d\n", component_label->clean);
2097 printf("Status: %d\n", component_label->status);
2098 #endif
2099 } else {
2100 printf("Failed to read RAID component label!\n");
2101 }
2102
2103 bp->b_flags = B_INVAL | B_AGE;
2104 brelse(bp);
2105 return(error);
2106 }
2107 /* ARGSUSED */
2108 int
2109 raidwrite_component_label(dev, b_vp, component_label)
2110 dev_t dev;
2111 struct vnode *b_vp;
2112 RF_ComponentLabel_t *component_label;
2113 {
2114 struct buf *bp;
2115 int error;
2116
2117 /* get a block of the appropriate size... */
2118 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2119 bp->b_dev = dev;
2120
2121 /* get our ducks in a row for the write */
2122 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2123 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2124 bp->b_flags = B_BUSY | B_WRITE;
2125 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2126
2127 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2128
2129 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2130
2131 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2132 error = biowait(bp);
2133 bp->b_flags = B_INVAL | B_AGE;
2134 brelse(bp);
2135 if (error) {
2136 printf("Failed to write RAID component info!\n");
2137 }
2138
2139 return(error);
2140 }
2141
2142 void
2143 rf_markalldirty( raidPtr )
2144 RF_Raid_t *raidPtr;
2145 {
2146 RF_ComponentLabel_t c_label;
2147 int r,c;
2148
2149 raidPtr->mod_counter++;
2150 for (r = 0; r < raidPtr->numRow; r++) {
2151 for (c = 0; c < raidPtr->numCol; c++) {
2152 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2153 raidread_component_label(
2154 raidPtr->Disks[r][c].dev,
2155 raidPtr->raid_cinfo[r][c].ci_vp,
2156 &c_label);
2157 if (c_label.status == rf_ds_spared) {
2158 /* XXX do something special...
2159 but whatever you do, don't
2160 try to access it!! */
2161 } else {
2162 #if 0
2163 c_label.status =
2164 raidPtr->Disks[r][c].status;
2165 raidwrite_component_label(
2166 raidPtr->Disks[r][c].dev,
2167 raidPtr->raid_cinfo[r][c].ci_vp,
2168 &c_label);
2169 #endif
2170 raidmarkdirty(
2171 raidPtr->Disks[r][c].dev,
2172 raidPtr->raid_cinfo[r][c].ci_vp,
2173 raidPtr->mod_counter);
2174 }
2175 }
2176 }
2177 }
2178 /* printf("Component labels marked dirty.\n"); */
2179 #if 0
2180 for( c = 0; c < raidPtr->numSpare ; c++) {
2181 sparecol = raidPtr->numCol + c;
2182 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2183 /*
2184
2185 XXX this is where we get fancy and map this spare
2186 into it's correct spot in the array.
2187
2188 */
2189 /*
2190
2191 we claim this disk is "optimal" if it's
2192 rf_ds_used_spare, as that means it should be
2193 directly substitutable for the disk it replaced.
2194 We note that too...
2195
2196 */
2197
2198 for(i=0;i<raidPtr->numRow;i++) {
2199 for(j=0;j<raidPtr->numCol;j++) {
2200 if ((raidPtr->Disks[i][j].spareRow ==
2201 r) &&
2202 (raidPtr->Disks[i][j].spareCol ==
2203 sparecol)) {
2204 srow = r;
2205 scol = sparecol;
2206 break;
2207 }
2208 }
2209 }
2210
2211 raidread_component_label(
2212 raidPtr->Disks[r][sparecol].dev,
2213 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2214 &c_label);
2215 /* make sure status is noted */
2216 c_label.version = RF_COMPONENT_LABEL_VERSION;
2217 c_label.mod_counter = raidPtr->mod_counter;
2218 c_label.serial_number = raidPtr->serial_number;
2219 c_label.row = srow;
2220 c_label.column = scol;
2221 c_label.num_rows = raidPtr->numRow;
2222 c_label.num_columns = raidPtr->numCol;
2223 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2224 c_label.status = rf_ds_optimal;
2225 raidwrite_component_label(
2226 raidPtr->Disks[r][sparecol].dev,
2227 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2228 &c_label);
2229 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2230 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2231 }
2232 }
2233
2234 #endif
2235 }
2236
2237
2238 void
2239 rf_update_component_labels( raidPtr )
2240 RF_Raid_t *raidPtr;
2241 {
2242 RF_ComponentLabel_t c_label;
2243 int sparecol;
2244 int r,c;
2245 int i,j;
2246 int srow, scol;
2247
2248 srow = -1;
2249 scol = -1;
2250
2251 /* XXX should do extra checks to make sure things really are clean,
2252 rather than blindly setting the clean bit... */
2253
2254 raidPtr->mod_counter++;
2255
2256 for (r = 0; r < raidPtr->numRow; r++) {
2257 for (c = 0; c < raidPtr->numCol; c++) {
2258 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2259 raidread_component_label(
2260 raidPtr->Disks[r][c].dev,
2261 raidPtr->raid_cinfo[r][c].ci_vp,
2262 &c_label);
2263 /* make sure status is noted */
2264 c_label.status = rf_ds_optimal;
2265 raidwrite_component_label(
2266 raidPtr->Disks[r][c].dev,
2267 raidPtr->raid_cinfo[r][c].ci_vp,
2268 &c_label);
2269 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2270 raidmarkclean(
2271 raidPtr->Disks[r][c].dev,
2272 raidPtr->raid_cinfo[r][c].ci_vp,
2273 raidPtr->mod_counter);
2274 }
2275 }
2276 /* else we don't touch it.. */
2277 #if 0
2278 else if (raidPtr->Disks[r][c].status !=
2279 rf_ds_failed) {
2280 raidread_component_label(
2281 raidPtr->Disks[r][c].dev,
2282 raidPtr->raid_cinfo[r][c].ci_vp,
2283 &c_label);
2284 /* make sure status is noted */
2285 c_label.status =
2286 raidPtr->Disks[r][c].status;
2287 raidwrite_component_label(
2288 raidPtr->Disks[r][c].dev,
2289 raidPtr->raid_cinfo[r][c].ci_vp,
2290 &c_label);
2291 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2292 raidmarkclean(
2293 raidPtr->Disks[r][c].dev,
2294 raidPtr->raid_cinfo[r][c].ci_vp,
2295 raidPtr->mod_counter);
2296 }
2297 }
2298 #endif
2299 }
2300 }
2301
2302 for( c = 0; c < raidPtr->numSpare ; c++) {
2303 sparecol = raidPtr->numCol + c;
2304 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2305 /*
2306
2307 we claim this disk is "optimal" if it's
2308 rf_ds_used_spare, as that means it should be
2309 directly substitutable for the disk it replaced.
2310 We note that too...
2311
2312 */
2313
2314 for(i=0;i<raidPtr->numRow;i++) {
2315 for(j=0;j<raidPtr->numCol;j++) {
2316 if ((raidPtr->Disks[i][j].spareRow ==
2317 0) &&
2318 (raidPtr->Disks[i][j].spareCol ==
2319 sparecol)) {
2320 srow = i;
2321 scol = j;
2322 break;
2323 }
2324 }
2325 }
2326
2327 raidread_component_label(
2328 raidPtr->Disks[0][sparecol].dev,
2329 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2330 &c_label);
2331 /* make sure status is noted */
2332 c_label.version = RF_COMPONENT_LABEL_VERSION;
2333 c_label.mod_counter = raidPtr->mod_counter;
2334 c_label.serial_number = raidPtr->serial_number;
2335 c_label.row = srow;
2336 c_label.column = scol;
2337 c_label.num_rows = raidPtr->numRow;
2338 c_label.num_columns = raidPtr->numCol;
2339 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2340 c_label.status = rf_ds_optimal;
2341 raidwrite_component_label(
2342 raidPtr->Disks[0][sparecol].dev,
2343 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2344 &c_label);
2345 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2346 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2347 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2348 raidPtr->mod_counter);
2349 }
2350 }
2351 }
2352 /* printf("Component labels updated\n"); */
2353 }
2354