rf_netbsdkintf.c revision 1.8 1 /* $NetBSD: rf_netbsdkintf.c,v 1.8 1999/01/26 02:33:59 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 static int rf_pending_testaccs;
181
182 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
183 RF_DECLARE_STATIC_MUTEX(rf_async_done_q_mutex)
184 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a spare table */
185 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from installation process */
186 static struct rf_test_acc *rf_async_done_qh, *rf_async_done_qt;
187
188 static struct rf_recon_req *recon_queue = NULL; /* used to communicate reconstruction requests */
189
190
191 decl_simple_lock_data(,recon_queue_mutex)
192
193
194 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
195 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
196
197 /* prototypes */
198 static void KernelWakeupFunc(struct buf *bp);
199 static void InitBP(struct buf *bp, struct vnode *, unsigned rw_flag, dev_t dev,
200 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc)(struct buf *), void *cbArg, int logBytesPerSector,
202 struct proc *b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
205 #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
207 #define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
208
209
210 /* this is so that we can compile under 2.0 as well as 3.2 */
211 #ifndef proc_to_task
212 #define proc_to_task(x) ((x)->task)
213 #endif /* !proc_to_task */
214
215 void raidattach __P((int));
216 int raidsize __P((dev_t));
217
218 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
219 void rf_CopybackReconstructedData(RF_Raid_t *raidPtr);
220 static int raidinit __P((dev_t,RF_Raid_t *,int));
221
222 int raidopen __P((dev_t, int, int, struct proc *));
223 int raidclose __P((dev_t, int, int, struct proc *));
224 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
225 int raidwrite __P((dev_t, struct uio *, int));
226 int raidread __P((dev_t, struct uio *, int));
227 void raidstrategy __P((struct buf *));
228 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
229
230 /*
231 * Pilfered from ccd.c
232 */
233
234 struct raidbuf {
235 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
236 struct buf *rf_obp; /* ptr. to original I/O buf */
237 int rf_flags; /* misc. flags */
238 RF_DiskQueueData_t *req; /* the request that this was part of.. */
239 };
240
241
242 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
243 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
244
245 /* XXX Not sure if the following should be replacing the raidPtrs above,
246 or if it should be used in conjunction with that... */
247
248 struct raid_softc {
249 int sc_unit; /* logical unit number */
250 int sc_flags; /* flags */
251 int sc_cflags; /* configuration flags */
252 size_t sc_size; /* size of the raid device */
253 dev_t sc_dev; /* our device..*/
254 char sc_xname[20]; /* XXX external name */
255 struct disk sc_dkdev; /* generic disk device info */
256 struct pool sc_cbufpool; /* component buffer pool */
257 };
258
259 /* sc_flags */
260 #define RAIDF_INITED 0x01 /* unit has been initialized */
261 #define RAIDF_WLABEL 0x02 /* label area is writable */
262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
264 #define RAIDF_LOCKED 0x80 /* unit is locked */
265
266 #define raidunit(x) DISKUNIT(x)
267 static int numraid=0;
268
269 #define RAIDLABELDEV(dev) \
270 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
271
272 /* declared here, and made public, for the benefit of KVM stuff.. */
273 struct raid_softc *raid_softc;
274
275 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *, struct disklabel *));
276 static void raidgetdisklabel __P((dev_t));
277 static void raidmakedisklabel __P((struct raid_softc *));
278
279 static int raidlock __P((struct raid_softc *));
280 static void raidunlock __P((struct raid_softc *));
281 int raidlookup __P((char *, struct proc *p, struct vnode **));
282
283
284 void
285 raidattach(num)
286 int num;
287 {
288 int raidID;
289
290 #ifdef DEBUG
291 printf("raidattach: Asked for %d units\n",num);
292 #endif
293
294 if (num <= 0) {
295 #ifdef DIAGNOSTIC
296 panic("raidattach: count <= 0");
297 #endif
298 return;
299 }
300 /*
301 This is where all the initialization stuff gets done.
302 */
303
304 /* Make some space for requested number of units... */
305
306 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
307 if (raidPtrs == NULL) {
308 panic("raidPtrs is NULL!!\n");
309 }
310
311
312
313 rf_kbooted = rf_boot();
314 if (rf_kbooted) {
315 panic("Serious error booting RAID!!\n");
316 }
317
318 rf_kbooted = RFK_BOOT_GOOD;
319
320 /*
321 put together some datastructures like the CCD device does..
322 This lets us lock the device and what-not when it gets opened.
323 */
324
325 raid_softc = (struct raid_softc *)
326 malloc(num * sizeof(struct raid_softc),
327 M_RAIDFRAME, M_NOWAIT);
328 if (raid_softc == NULL) {
329 printf("WARNING: no memory for RAIDframe driver\n");
330 return;
331 }
332 numraid = num;
333 bzero(raid_softc, num * sizeof(struct raid_softc));
334
335 for(raidID=0;raidID < num;raidID++) {
336 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
337 (RF_Raid_t *));
338 if (raidPtrs[raidID]==NULL) {
339 printf("raidPtrs[%d] is NULL\n",raidID);
340 }
341 }
342 }
343
344
345 int
346 raidsize(dev)
347 dev_t dev;
348 {
349 struct raid_softc *rs;
350 struct disklabel *lp;
351 int part, unit, omask, size;
352
353 unit = raidunit(dev);
354 if (unit >= numraid)
355 return (-1);
356 rs = &raid_softc[unit];
357
358 if ((rs->sc_flags & RAIDF_INITED) == 0)
359 return (-1);
360
361 part = DISKPART(dev);
362 omask = rs->sc_dkdev.dk_openmask & (1 << part);
363 lp = rs->sc_dkdev.dk_label;
364
365 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
366 return (-1);
367
368 if (lp->d_partitions[part].p_fstype != FS_SWAP)
369 size = -1;
370 else
371 size = lp->d_partitions[part].p_size *
372 (lp->d_secsize / DEV_BSIZE);
373
374 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
375 return (-1);
376
377 return (size);
378
379 }
380
381 int
382 raiddump(dev, blkno, va, size)
383 dev_t dev;
384 daddr_t blkno;
385 caddr_t va;
386 size_t size;
387 {
388 /* Not implemented. */
389 return ENXIO;
390 }
391
392 /* ARGSUSED */
393 int
394 raidopen(dev, flags, fmt, p)
395 dev_t dev;
396 int flags, fmt;
397 struct proc *p;
398 {
399 int unit = raidunit(dev);
400 struct raid_softc *rs;
401 struct disklabel *lp;
402 int part,pmask;
403 unsigned int raidID;
404 int rc;
405 int error = 0;
406
407 /* This whole next chunk of code is somewhat suspect... Not sure
408 it's needed here at all... XXX */
409
410 if (rf_kbooted == RFK_BOOT_NONE) {
411 printf("Doing restart on raidopen.\n");
412 rf_kbooted = RFK_BOOT_GOOD;
413 rc = rf_boot();
414 if (rc) {
415 rf_kbooted = RFK_BOOT_BAD;
416 printf("Someone is unhappy...\n");
417 return(rc);
418 }
419 }
420
421 if (unit >= numraid)
422 return (ENXIO);
423 rs = &raid_softc[unit];
424
425 if ((error = raidlock(rs)) != 0)
426 return(error);
427 lp = rs->sc_dkdev.dk_label;
428
429 raidID = raidunit(dev);
430
431 part = DISKPART(dev);
432 pmask = (1 << part);
433
434 db1_printf(("Opening raid device number: %d partition: %d\n",
435 raidID,part));
436
437
438 if ((rs->sc_flags & RAIDF_INITED) &&
439 (rs->sc_dkdev.dk_openmask == 0))
440 raidgetdisklabel(dev);
441
442 /* make sure that this partition exists */
443
444 if (part != RAW_PART) {
445 db1_printf(("Not a raw partition..\n"));
446 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
447 ((part >= lp->d_npartitions) ||
448 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
449 error = ENXIO;
450 raidunlock(rs);
451 db1_printf(("Bailing out...\n"));
452 return(error);
453 }
454 }
455
456 /* Prevent this unit from being unconfigured while open. */
457 switch (fmt) {
458 case S_IFCHR:
459 rs->sc_dkdev.dk_copenmask |= pmask;
460 break;
461
462 case S_IFBLK:
463 rs->sc_dkdev.dk_bopenmask |= pmask;
464 break;
465 }
466 rs->sc_dkdev.dk_openmask =
467 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
468
469 raidunlock(rs);
470
471 return(error);
472
473
474 }
475
476 /* ARGSUSED */
477 int
478 raidclose(dev, flags, fmt, p)
479 dev_t dev;
480 int flags, fmt;
481 struct proc *p;
482 {
483 int unit = raidunit(dev);
484 struct raid_softc *rs;
485 int error = 0;
486 int part;
487
488 if (unit >= numraid)
489 return (ENXIO);
490 rs = &raid_softc[unit];
491
492 if ((error = raidlock(rs)) != 0)
493 return (error);
494
495 part = DISKPART(dev);
496
497 /* ...that much closer to allowing unconfiguration... */
498 switch (fmt) {
499 case S_IFCHR:
500 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
501 break;
502
503 case S_IFBLK:
504 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
505 break;
506 }
507 rs->sc_dkdev.dk_openmask =
508 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
509
510 raidunlock(rs);
511 return (0);
512
513 }
514
515 void
516 raidstrategy(bp)
517 register struct buf *bp;
518 {
519 register int s;
520
521 unsigned int raidID = raidunit(bp->b_dev);
522 RF_Raid_t *raidPtr;
523 struct raid_softc *rs = &raid_softc[raidID];
524 struct disklabel *lp;
525 int wlabel;
526
527 #if 0
528 db1_printf(("Strategy: 0x%x 0x%x\n",bp,bp->b_data));
529 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int)bp->b_bufsize));
530 db1_printf(("bp->b_count=%d\n",(int)bp->b_bcount));
531 db1_printf(("bp->b_resid=%d\n",(int)bp->b_resid));
532 db1_printf(("bp->b_blkno=%d\n",(int)bp->b_blkno));
533
534 if (bp->b_flags&B_READ)
535 db1_printf(("READ\n"));
536 else
537 db1_printf(("WRITE\n"));
538 #endif
539 if (rf_kbooted != RFK_BOOT_GOOD)
540 return;
541 if (raidID >= numraid || !raidPtrs[raidID]) {
542 bp->b_error = ENODEV;
543 bp->b_flags |= B_ERROR;
544 bp->b_resid = bp->b_bcount;
545 biodone(bp);
546 return;
547 }
548 raidPtr = raidPtrs[raidID];
549 if (!raidPtr->valid) {
550 bp->b_error = ENODEV;
551 bp->b_flags |= B_ERROR;
552 bp->b_resid = bp->b_bcount;
553 biodone(bp);
554 return;
555 }
556 if (bp->b_bcount == 0) {
557 db1_printf(("b_bcount is zero..\n"));
558 biodone(bp);
559 return;
560 }
561 lp = rs->sc_dkdev.dk_label;
562
563 /*
564 * Do bounds checking and adjust transfer. If there's an
565 * error, the bounds check will flag that for us.
566 */
567
568 wlabel = rs->sc_flags & (RAIDF_WLABEL|RAIDF_LABELLING);
569 if (DISKPART(bp->b_dev) != RAW_PART)
570 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
571 db1_printf(("Bounds check failed!!:%d %d\n",
572 (int)bp->b_blkno,(int)wlabel));
573 biodone(bp);
574 return;
575 }
576
577 s = splbio(); /* XXX Needed? */
578 db1_printf(("Beginning strategy...\n"));
579
580 bp->b_resid = 0;
581 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
582 NULL, NULL, NULL);
583 if (bp->b_error) {
584 bp->b_flags |= B_ERROR;
585 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
586 bp->b_error));
587 }
588 splx(s);
589 #if 0
590 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
591 bp,bp->b_data,
592 (int)bp->b_bcount,(int)bp->b_resid));
593 #endif
594 }
595
596 /* ARGSUSED */
597 int
598 raidread(dev, uio, flags)
599 dev_t dev;
600 struct uio *uio;
601 int flags;
602 {
603 int unit = raidunit(dev);
604 struct raid_softc *rs;
605 int result;
606 int part;
607
608 if (unit >= numraid)
609 return (ENXIO);
610 rs = &raid_softc[unit];
611
612 if ((rs->sc_flags & RAIDF_INITED) == 0)
613 return (ENXIO);
614 part = DISKPART(dev);
615
616 db1_printf(("raidread: unit: %d partition: %d\n",unit,part));
617
618 #if 0
619 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
620 #endif
621 result=physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
622 db1_printf(("raidread done. Result is %d %d\n",
623 result,uio->uio_resid));
624 return(result);
625
626 }
627
628 /* ARGSUSED */
629 int
630 raidwrite(dev, uio, flags)
631 dev_t dev;
632 struct uio *uio;
633 int flags;
634 {
635 int unit = raidunit(dev);
636 struct raid_softc *rs;
637
638 if (unit >= numraid)
639 return (ENXIO);
640 rs = &raid_softc[unit];
641
642 if ((rs->sc_flags & RAIDF_INITED) == 0)
643 return (ENXIO);
644 db1_printf(("raidwrite\n"));
645 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
646
647
648 }
649
650 int
651 raidioctl(dev, cmd, data, flag, p)
652 dev_t dev;
653 u_long cmd;
654 caddr_t data;
655 int flag;
656 struct proc *p;
657 {
658 int unit = raidunit(dev);
659 int error = 0;
660 int part, pmask;
661 struct raid_softc *rs;
662 #if 0
663 int r,c;
664 #endif
665 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
666
667 /* struct ccdbuf *cbp; */
668 /* struct raidbuf *raidbp; */
669 RF_Config_t *k_cfg, *u_cfg;
670 u_char *specific_buf;
671 int retcode = 0;
672
673 int row;
674 struct rf_recon_req *rrcopy, *rr;
675 #if 0
676 int nbytes, spl, rw, row;
677 struct rf_test_acc *ta;
678 struct buf *bp;
679 RF_SparetWait_t *waitreq;
680 struct rf_test_acc *ta_p, *ta_copy;
681 #endif
682
683 if (unit >= numraid)
684 return (ENXIO);
685 rs = &raid_softc[unit];
686
687 db1_printf(("raidioctl: %d %d %d %d\n",(int)dev,
688 (int)DISKPART(dev),(int)unit,(int)cmd));
689
690 /* Must be open for writes for these commands... */
691 switch (cmd) {
692 case DIOCSDINFO:
693 case DIOCWDINFO:
694 case DIOCWLABEL:
695 if ((flag & FWRITE) == 0)
696 return (EBADF);
697 }
698
699 /* Must be initialized for these... */
700 switch (cmd) {
701 case DIOCGDINFO:
702 case DIOCSDINFO:
703 case DIOCWDINFO:
704 case DIOCGPART:
705 case DIOCWLABEL:
706 case DIOCGDEFLABEL:
707 case RAIDFRAME_SHUTDOWN:
708 case RAIDFRAME_REWRITEPARITY:
709 case RAIDFRAME_GET_INFO:
710 case RAIDFRAME_RESET_ACCTOTALS:
711 case RAIDFRAME_GET_ACCTOTALS:
712 case RAIDFRAME_KEEP_ACCTOTALS:
713 case RAIDFRAME_GET_SIZE:
714 case RAIDFRAME_FAIL_DISK:
715 case RAIDFRAME_COPYBACK:
716 case RAIDFRAME_CHECKRECON:
717 if ((rs->sc_flags & RAIDF_INITED) == 0)
718 return (ENXIO);
719 }
720
721 switch (cmd) {
722
723
724 /* configure the system */
725 case RAIDFRAME_CONFIGURE:
726
727 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
728 /* copy-in the configuration information */
729 /* data points to a pointer to the configuration structure */
730 u_cfg = *((RF_Config_t **) data);
731 RF_Malloc(k_cfg,sizeof(RF_Config_t),(RF_Config_t *));
732 if (k_cfg == NULL) {
733 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
734 return(ENOMEM);
735 }
736 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
737 sizeof(RF_Config_t));
738 if (retcode) {
739 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
740 retcode));
741 return(retcode);
742 }
743
744 /* allocate a buffer for the layout-specific data,
745 and copy it in */
746 if (k_cfg->layoutSpecificSize) {
747 if (k_cfg->layoutSpecificSize > 10000) {
748 /* sanity check */
749 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
750 return(EINVAL);
751 }
752 RF_Malloc(specific_buf,k_cfg->layoutSpecificSize,
753 (u_char *));
754 if (specific_buf == NULL) {
755 RF_Free(k_cfg,sizeof(RF_Config_t));
756 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
757 return(ENOMEM);
758 }
759 retcode = copyin(k_cfg->layoutSpecific,
760 (caddr_t) specific_buf,
761 k_cfg->layoutSpecificSize);
762 if (retcode) {
763 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
764 retcode));
765 return(retcode);
766 }
767 } else specific_buf = NULL;
768 k_cfg->layoutSpecific = specific_buf;
769
770 /* should do some kind of sanity check on the configuration.
771 Store the sum of all the bytes in the last byte?
772 */
773
774 #if 0
775 db1_printf(("Considering configuring the system.:%d 0x%x\n",
776 unit,p));
777 #endif
778
779 /* We need the pointer to this a little deeper, so
780 stash it here... */
781
782 raidPtrs[unit]->proc = p;
783
784 /* configure the system */
785 rf_pending_testaccs = 0;
786
787
788 raidPtrs[unit]->raidid = unit;
789 retcode = rf_Configure(raidPtrs[unit], k_cfg);
790
791
792 if (retcode == 0) {
793 retcode = raidinit(dev, raidPtrs[unit],unit);
794 }
795
796 /* free the buffers. No return code here. */
797 if (k_cfg->layoutSpecificSize) {
798 RF_Free(specific_buf,k_cfg->layoutSpecificSize);
799 }
800 RF_Free(k_cfg,sizeof(RF_Config_t));
801
802 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
803 retcode));
804 return(retcode);
805
806 /* shutdown the system */
807 case RAIDFRAME_SHUTDOWN:
808
809 if ((error = raidlock(rs)) != 0)
810 return(error);
811
812 /*
813 * If somebody has a partition mounted, we shouldn't
814 * shutdown.
815 */
816
817 part = DISKPART(dev);
818 pmask = (1 << part);
819 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
820 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
821 (rs->sc_dkdev.dk_copenmask & pmask))) {
822 raidunlock(rs);
823 return (EBUSY);
824 }
825
826 /* the intention here was to disallow shutdowns while
827 raidframe is mounted, but it doesn't work because the
828 shutdown ioctl calls rf_open
829 */
830 if (rf_pending_testaccs > 0) {
831 printf("RAIDFRAME: Can't shutdown because there are %d pending test accs\n",
832 rf_pending_testaccs);
833 return(EINVAL);
834 }
835 if (rf_debugKernelAccess) {
836 printf("call shutdown\n");
837 }
838 raidPtrs[unit]->proc = p; /* XXX necessary evil */
839 retcode = rf_Shutdown(raidPtrs[unit]);
840
841 db1_printf(("Done main shutdown\n"));
842
843 pool_destroy(&rs->sc_cbufpool);
844 db1_printf(("Done freeing component buffer freelist\n"));
845
846 /* It's no longer initialized... */
847 rs->sc_flags &= ~RAIDF_INITED;
848
849 /* Detach the disk. */
850 disk_detach(&rs->sc_dkdev);
851
852 raidunlock(rs);
853
854 return(retcode);
855
856 /* initialize all parity */
857 case RAIDFRAME_REWRITEPARITY:
858
859 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
860 return(EINVAL);
861 /* borrow the thread of the requesting process */
862 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
863 retcode = rf_RewriteParity(raidPtrs[unit]);
864 /* return I/O Error if the parity rewrite fails */
865
866 if (retcode)
867 retcode = EIO;
868 return(retcode);
869
870 /* issue a test-unit-ready through raidframe to the
871 indicated device */
872 #if 0 /* XXX not supported yet (ever?) */
873 case RAIDFRAME_TUR:
874 /* debug only */
875 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
876 return(retcode);
877 #endif
878 case RAIDFRAME_GET_INFO:
879 {
880 RF_Raid_t *raid = raidPtrs[unit];
881 RF_DeviceConfig_t *cfg, **ucfgp;
882 int i, j, d;
883
884 if (!raid->valid)
885 return(ENODEV);
886 ucfgp = (RF_DeviceConfig_t **)data;
887 RF_Malloc(cfg,sizeof(RF_DeviceConfig_t),
888 (RF_DeviceConfig_t *));
889 if (cfg == NULL)
890 return(ENOMEM);
891 bzero((char *)cfg, sizeof(RF_DeviceConfig_t));
892 cfg->rows = raid->numRow;
893 cfg->cols = raid->numCol;
894 cfg->ndevs = raid->numRow * raid->numCol;
895 if (cfg->ndevs >= RF_MAX_DISKS) {
896 cfg->ndevs = 0;
897 return(ENOMEM);
898 }
899 cfg->nspares = raid->numSpare;
900 if (cfg->nspares >= RF_MAX_DISKS) {
901 cfg->nspares = 0;
902 return(ENOMEM);
903 }
904 cfg->maxqdepth = raid->maxQueueDepth;
905 d = 0;
906 for(i=0;i<cfg->rows;i++) {
907 for(j=0;j<cfg->cols;j++) {
908 cfg->devs[d] = raid->Disks[i][j];
909 d++;
910 }
911 }
912 for(j=cfg->cols,i=0;i<cfg->nspares;i++,j++) {
913 cfg->spares[i] = raid->Disks[0][j];
914 }
915 retcode = copyout((caddr_t)cfg, (caddr_t)*ucfgp,
916 sizeof(RF_DeviceConfig_t));
917 RF_Free(cfg,sizeof(RF_DeviceConfig_t));
918
919 return(retcode);
920 }
921 break;
922
923 case RAIDFRAME_RESET_ACCTOTALS:
924 {
925 RF_Raid_t *raid = raidPtrs[unit];
926
927 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
928 return(0);
929 }
930 break;
931
932 case RAIDFRAME_GET_ACCTOTALS:
933 {
934 RF_AccTotals_t *totals = (RF_AccTotals_t *)data;
935 RF_Raid_t *raid = raidPtrs[unit];
936
937 *totals = raid->acc_totals;
938 return(0);
939 }
940 break;
941
942 case RAIDFRAME_KEEP_ACCTOTALS:
943 {
944 RF_Raid_t *raid = raidPtrs[unit];
945 int *keep = (int *)data;
946
947 raid->keep_acc_totals = *keep;
948 return(0);
949 }
950 break;
951
952 case RAIDFRAME_GET_SIZE:
953 *(int *) data = raidPtrs[unit]->totalSectors;
954 return(0);
955
956 #define RAIDFRAME_RECON 1
957 /* XXX The above should probably be set somewhere else!! GO */
958 #if RAIDFRAME_RECON > 0
959
960 /* fail a disk & optionally start reconstruction */
961 case RAIDFRAME_FAIL_DISK:
962 rr = (struct rf_recon_req *) data;
963
964 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
965 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
966 return(EINVAL);
967
968 printf("Failing the disk: row: %d col: %d\n",rr->row,rr->col);
969
970 /* make a copy of the recon request so that we don't
971 rely on the user's buffer */
972 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
973 bcopy(rr, rrcopy, sizeof(*rr));
974 rrcopy->raidPtr = (void *) raidPtrs[unit];
975
976 LOCK_RECON_Q_MUTEX();
977 rrcopy->next = recon_queue;
978 recon_queue = rrcopy;
979 wakeup(&recon_queue);
980 UNLOCK_RECON_Q_MUTEX();
981
982 return(0);
983
984 /* invoke a copyback operation after recon on whatever
985 disk needs it, if any */
986 case RAIDFRAME_COPYBACK:
987 /* borrow the current thread to get this done */
988 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
989 rf_CopybackReconstructedData(raidPtrs[unit]);
990 return(0);
991
992 /* return the percentage completion of reconstruction */
993 case RAIDFRAME_CHECKRECON:
994 row = *(int *) data;
995 if (row < 0 || row >= raidPtrs[unit]->numRow)
996 return(EINVAL);
997 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
998 *(int *) data = 100;
999 else
1000 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1001 return(0);
1002
1003 /* the sparetable daemon calls this to wait for the
1004 kernel to need a spare table.
1005 * this ioctl does not return until a spare table is needed.
1006 * XXX -- calling mpsleep here in the ioctl code is almost
1007 certainly wrong and evil. -- XXX
1008 * XXX -- I should either compute the spare table in the
1009 kernel, or have a different -- XXX
1010 * XXX -- interface (a different character device) for
1011 delivering the table -- XXX
1012 */
1013 #if 0
1014 case RAIDFRAME_SPARET_WAIT:
1015 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1016 while (!rf_sparet_wait_queue) mpsleep(&rf_sparet_wait_queue, (PZERO+1)|PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1017 waitreq = rf_sparet_wait_queue;
1018 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1019 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1020
1021 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1022
1023 RF_Free(waitreq, sizeof(*waitreq));
1024 return(0);
1025
1026
1027 /* wakes up a process waiting on SPARET_WAIT and puts an
1028 error code in it that will cause the dameon to exit */
1029 case RAIDFRAME_ABORT_SPARET_WAIT:
1030 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1031 waitreq->fcol = -1;
1032 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1033 waitreq->next = rf_sparet_wait_queue;
1034 rf_sparet_wait_queue = waitreq;
1035 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1036 wakeup(&rf_sparet_wait_queue);
1037 return(0);
1038
1039 /* used by the spare table daemon to deliver a spare table
1040 into the kernel */
1041 case RAIDFRAME_SEND_SPARET:
1042
1043 /* install the spare table */
1044 retcode = rf_SetSpareTable(raidPtrs[unit],*(void **) data);
1045
1046 /* respond to the requestor. the return status of the
1047 spare table installation is passed in the "fcol" field */
1048 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1049 waitreq->fcol = retcode;
1050 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1051 waitreq->next = rf_sparet_resp_queue;
1052 rf_sparet_resp_queue = waitreq;
1053 wakeup(&rf_sparet_resp_queue);
1054 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1055
1056 return(retcode);
1057 #endif
1058
1059
1060 #endif /* RAIDFRAME_RECON > 0 */
1061
1062 default: break; /* fall through to the os-specific code below */
1063
1064 }
1065
1066 if (!raidPtrs[unit]->valid)
1067 return(EINVAL);
1068
1069 /*
1070 * Add support for "regular" device ioctls here.
1071 */
1072
1073 switch (cmd) {
1074 case DIOCGDINFO:
1075 db1_printf(("DIOCGDINFO %d %d\n",(int)dev,(int)DISKPART(dev)));
1076 *(struct disklabel *)data = *(rs->sc_dkdev.dk_label);
1077 break;
1078
1079 case DIOCGPART:
1080 db1_printf(("DIOCGPART: %d %d\n",(int)dev,(int)DISKPART(dev)));
1081 ((struct partinfo *)data)->disklab = rs->sc_dkdev.dk_label;
1082 ((struct partinfo *)data)->part =
1083 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1084 break;
1085
1086 case DIOCWDINFO:
1087 db1_printf(("DIOCWDINFO\n"));
1088 case DIOCSDINFO:
1089 db1_printf(("DIOCSDINFO\n"));
1090 if ((error = raidlock(rs)) != 0)
1091 return (error);
1092
1093 rs->sc_flags |= RAIDF_LABELLING;
1094
1095 error = setdisklabel(rs->sc_dkdev.dk_label,
1096 (struct disklabel *)data, 0, rs->sc_dkdev.dk_cpulabel);
1097 if (error == 0) {
1098 if (cmd == DIOCWDINFO)
1099 error = writedisklabel(RAIDLABELDEV(dev),
1100 raidstrategy, rs->sc_dkdev.dk_label,
1101 rs->sc_dkdev.dk_cpulabel);
1102 }
1103
1104 rs->sc_flags &= ~RAIDF_LABELLING;
1105
1106 raidunlock(rs);
1107
1108 if (error)
1109 return (error);
1110 break;
1111
1112 case DIOCWLABEL:
1113 db1_printf(("DIOCWLABEL\n"));
1114 if (*(int *)data != 0)
1115 rs->sc_flags |= RAIDF_WLABEL;
1116 else
1117 rs->sc_flags &= ~RAIDF_WLABEL;
1118 break;
1119
1120 case DIOCGDEFLABEL:
1121 db1_printf(("DIOCGDEFLABEL\n"));
1122 raidgetdefaultlabel(raidPtrs[unit], rs,
1123 (struct disklabel *)data);
1124 break;
1125
1126 default:
1127 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1128 }
1129 return(retcode);
1130
1131 }
1132
1133
1134 /* raidinit -- complete the rest of the initialization for the
1135 RAIDframe device. */
1136
1137
1138 static int
1139 raidinit(dev, raidPtr,unit)
1140 dev_t dev;
1141 RF_Raid_t *raidPtr;
1142 int unit;
1143 {
1144 int retcode;
1145 /* int ix; */
1146 /* struct raidbuf *raidbp; */
1147 struct raid_softc *rs;
1148
1149 retcode = 0;
1150
1151 rs = &raid_softc[unit];
1152 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1153 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1154
1155
1156 /* XXX should check return code first... */
1157 rs->sc_flags |= RAIDF_INITED;
1158
1159 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds.*/
1160
1161 rs->sc_dkdev.dk_name = rs->sc_xname;
1162 /* disk_attach actually creates space for the CPU disklabel, among
1163 other things, so it's critical to call this *BEFORE* we
1164 try putzing with disklabels. */
1165 disk_attach(&rs->sc_dkdev);
1166
1167 /* XXX There may be a weird interaction here between this, and
1168 protectedSectors, as used in RAIDframe. */
1169 rs->sc_size = raidPtr->totalSectors;
1170 rs->sc_dev = dev;
1171 return(retcode);
1172 }
1173
1174
1175 /*********************************************************
1176 *
1177 * initialization code called at boot time (startup.c)
1178 *
1179 ********************************************************/
1180 int rf_boot()
1181 {
1182 int i, rc;
1183
1184 rc = rf_mutex_init(&rf_sparet_wait_mutex);
1185 if (rc) {
1186 RF_PANIC();
1187 }
1188 rc = rf_mutex_init(&rf_async_done_q_mutex);
1189 if (rc) {
1190 RF_PANIC();
1191 }
1192 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
1193 recon_queue = NULL;
1194 rf_async_done_qh = rf_async_done_qt = NULL;
1195 for (i=0; i<numraid; i++)
1196 raidPtrs[i] = NULL;
1197 rc = rf_BootRaidframe();
1198 if (rc == 0)
1199 printf("Kernelized RAIDframe activated\n");
1200 else
1201 rf_kbooted = RFK_BOOT_BAD;
1202 return(rc);
1203 }
1204
1205 /*
1206 * This kernel thread never exits. It is created once, and persists
1207 * until the system reboots.
1208 */
1209 void rf_ReconKernelThread()
1210 {
1211 struct rf_recon_req *req;
1212 int s;
1213
1214 /* XXX not sure what spl() level we should be at here... probably splbio() */
1215 s=splbio();
1216
1217 while (1) {
1218 /* grab the next reconstruction request from the queue */
1219 LOCK_RECON_Q_MUTEX();
1220 while (!recon_queue) {
1221 UNLOCK_RECON_Q_MUTEX();
1222 tsleep(&recon_queue, PRIBIO | PCATCH, "raidframe recon", 0);
1223 LOCK_RECON_Q_MUTEX();
1224 }
1225 req = recon_queue;
1226 recon_queue = recon_queue->next;
1227 UNLOCK_RECON_Q_MUTEX();
1228
1229 /*
1230 * If flags specifies that we should start recon, this call
1231 * will not return until reconstruction completes, fails, or is aborted.
1232 */
1233 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1234 ((req->flags&RF_FDFLAGS_RECON) ? 1 : 0));
1235
1236 RF_Free(req, sizeof(*req));
1237 }
1238 }
1239 /* wake up the daemon & tell it to get us a spare table
1240 * XXX
1241 * the entries in the queues should be tagged with the raidPtr
1242 * so that in the extremely rare case that two recons happen at once, we know for
1243 * which device were requesting a spare table
1244 * XXX
1245 */
1246 int rf_GetSpareTableFromDaemon(req)
1247 RF_SparetWait_t *req;
1248 {
1249 int retcode;
1250
1251 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1252 req->next = rf_sparet_wait_queue;
1253 rf_sparet_wait_queue = req;
1254 wakeup(&rf_sparet_wait_queue);
1255
1256 /* mpsleep unlocks the mutex */
1257 while (!rf_sparet_resp_queue) {
1258 tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
1259 "raidframe getsparetable", 0);
1260 #if 0
1261 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1262 #endif
1263 }
1264 req = rf_sparet_resp_queue;
1265 rf_sparet_resp_queue = req->next;
1266 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1267
1268 retcode = req->fcol;
1269 RF_Free(req, sizeof(*req)); /* this is not the same req as we alloc'd */
1270 return(retcode);
1271 }
1272
1273 /* a wrapper around rf_DoAccess that extracts appropriate info from the bp & passes it down.
1274 * any calls originating in the kernel must use non-blocking I/O
1275 * do some extra sanity checking to return "appropriate" error values for
1276 * certain conditions (to make some standard utilities work)
1277 */
1278 int rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1279 RF_Raid_t *raidPtr;
1280 struct buf *bp;
1281 RF_RaidAccessFlags_t flags;
1282 void (*cbFunc)(struct buf *);
1283 void *cbArg;
1284 {
1285 RF_SectorCount_t num_blocks, pb, sum;
1286 RF_RaidAddr_t raid_addr;
1287 int retcode;
1288 struct partition *pp;
1289 daddr_t blocknum;
1290 int unit;
1291 struct raid_softc *rs;
1292 int do_async;
1293
1294 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1295
1296 unit = raidPtr->raidid;
1297 rs = &raid_softc[unit];
1298
1299 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1300 partition.. Need to make it absolute to the underlying
1301 device.. */
1302
1303 blocknum = bp->b_blkno;
1304 if (DISKPART(bp->b_dev) != RAW_PART) {
1305 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1306 blocknum += pp->p_offset;
1307 db1_printf(("updated: %d %d\n",DISKPART(bp->b_dev),
1308 pp->p_offset));
1309 } else {
1310 db1_printf(("Is raw..\n"));
1311 }
1312 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1313
1314 db1_printf(("bp->b_bcount = %d\n",(int)bp->b_bcount));
1315 db1_printf(("bp->b_resid = %d\n",(int)bp->b_resid));
1316
1317 /* *THIS* is where we adjust what block we're going to... but
1318 DO NOT TOUCH bp->b_blkno!!! */
1319 raid_addr = blocknum;
1320
1321 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1322 pb = (bp->b_bcount&raidPtr->sectorMask) ? 1 : 0;
1323 sum = raid_addr + num_blocks + pb;
1324 if (1 || rf_debugKernelAccess) {
1325 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1326 (int)raid_addr, (int)sum,(int)num_blocks,
1327 (int)pb,(int)bp->b_resid));
1328 }
1329
1330
1331 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1332 || (sum < num_blocks) || (sum < pb))
1333 {
1334 bp->b_error = ENOSPC;
1335 bp->b_flags |= B_ERROR;
1336 bp->b_resid = bp->b_bcount;
1337 biodone(bp);
1338 return(bp->b_error);
1339 }
1340
1341 /*
1342 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1343 */
1344
1345 if (bp->b_bcount & raidPtr->sectorMask) {
1346 bp->b_error = EINVAL;
1347 bp->b_flags |= B_ERROR;
1348 bp->b_resid = bp->b_bcount;
1349 biodone(bp);
1350 return(bp->b_error);
1351 }
1352 db1_printf(("Calling DoAccess..\n"));
1353
1354 /*
1355 * XXX For now, all writes are sync
1356 */
1357 do_async = 1;
1358 if ((bp->b_flags & B_READ) == 0)
1359 do_async = 0;
1360
1361 /* don't ever condition on bp->b_flags & B_WRITE.
1362 always condition on B_READ instead */
1363 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1364 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1365 do_async, raid_addr, num_blocks,
1366 bp->b_un.b_addr,
1367 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO|flags,
1368 NULL, cbFunc, cbArg);
1369 #if 0
1370 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n",bp,
1371 bp->b_data,(int)bp->b_resid));
1372 #endif
1373
1374 /*
1375 * If we requested sync I/O, sleep here.
1376 */
1377 if ((retcode == 0) && (do_async == 0))
1378 tsleep(bp, PRIBIO, "raidsyncio", 0);
1379
1380 return(retcode);
1381 }
1382
1383 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1384
1385 int rf_DispatchKernelIO(queue, req)
1386 RF_DiskQueue_t *queue;
1387 RF_DiskQueueData_t *req;
1388 {
1389 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1390 struct buf *bp;
1391 struct raidbuf *raidbp=NULL;
1392 struct raid_softc *rs;
1393 int unit;
1394
1395 /* XXX along with the vnode, we also need the softc associated with
1396 this device.. */
1397
1398 req->queue = queue;
1399
1400 unit = queue->raidPtr->raidid;
1401
1402 db1_printf(("DispatchKernelIO unit: %d\n",unit));
1403
1404 if (unit >= numraid) {
1405 printf("Invalid unit number: %d %d\n",unit,numraid);
1406 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1407 }
1408
1409 rs = &raid_softc[unit];
1410
1411 /* XXX is this the right place? */
1412 disk_busy(&rs->sc_dkdev);
1413
1414 bp = req->bp;
1415
1416 /*
1417 XXX when there is a physical disk failure, someone is passing
1418 us a buffer that contains old stuff!! Attempt to deal with
1419 this problem without taking a performance hit...
1420 (not sure where the real bug is. It's buried in RAIDframe
1421 somewhere) :-( GO )
1422 */
1423
1424 if (bp->b_flags & B_ERROR) {
1425 bp->b_flags &= ~B_ERROR;
1426 }
1427 if (bp->b_error!=0) {
1428 bp->b_error = 0;
1429 }
1430
1431 raidbp = RAIDGETBUF(rs);
1432
1433 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1434
1435 /*
1436 * context for raidiodone
1437 */
1438 raidbp->rf_obp = bp;
1439 raidbp->req = req;
1440
1441 switch (req->type) {
1442 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1443 /*
1444 Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1445 queue->row, queue->col);
1446 */
1447 /* XXX need to do something extra here.. */
1448 /* I'm leaving this in, as I've never actually seen it
1449 used, and I'd like folks to report it... GO */
1450 printf(("WAKEUP CALLED\n"));
1451 queue->numOutstanding++;
1452
1453 /* XXX need to glue the original buffer into this?? */
1454
1455 KernelWakeupFunc(&raidbp->rf_buf);
1456 break;
1457
1458 case RF_IO_TYPE_READ:
1459 case RF_IO_TYPE_WRITE:
1460
1461 if (req->tracerec) {
1462 RF_ETIMER_START(req->tracerec->timer);
1463 }
1464
1465
1466 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1467 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1468 req->sectorOffset, req->numSector,
1469 req->buf, KernelWakeupFunc, (void *) req,
1470 queue->raidPtr->logBytesPerSector, req->b_proc);
1471
1472 if (rf_debugKernelAccess) {
1473 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1474 (long) bp->b_blkno));
1475 }
1476 queue->numOutstanding++;
1477 queue->last_deq_sector = req->sectorOffset;
1478 /* acc wouldn't have been let in if there were any
1479 pending reqs at any other priority */
1480 queue->curPriority = req->priority;
1481 /*
1482 Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1483 req->type, queue->row, queue->col);
1484 */
1485
1486 db1_printf(("Going for %c to unit %d row %d col %d\n",
1487 req->type, unit, queue->row, queue->col));
1488 db1_printf(("sector %d count %d (%d bytes) %d\n",
1489 (int) req->sectorOffset, (int) req->numSector,
1490 (int) (req->numSector <<
1491 queue->raidPtr->logBytesPerSector),
1492 (int) queue->raidPtr->logBytesPerSector));
1493 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1494 raidbp->rf_buf.b_vp->v_numoutput++;
1495 }
1496
1497 VOP_STRATEGY(&raidbp->rf_buf);
1498
1499 break;
1500
1501 default:
1502 panic("bad req->type in rf_DispatchKernelIO");
1503 }
1504 db1_printf(("Exiting from DispatchKernelIO\n"));
1505 return(0);
1506 }
1507
1508 /* this is the callback function associated with a I/O invoked from
1509 kernel code.
1510 */
1511 static void KernelWakeupFunc(vbp)
1512 struct buf *vbp;
1513 {
1514 RF_DiskQueueData_t *req = NULL;
1515 RF_DiskQueue_t *queue;
1516 struct raidbuf *raidbp = (struct raidbuf *)vbp;
1517 struct buf *bp;
1518 struct raid_softc *rs;
1519 int unit;
1520 register int s;
1521
1522 s=splbio(); /* XXX */
1523 db1_printf(("recovering the request queue:\n"));
1524 req = raidbp->req;
1525
1526 bp = raidbp->rf_obp;
1527 #if 0
1528 db1_printf(("bp=0x%x\n",bp));
1529 #endif
1530
1531 queue = (RF_DiskQueue_t *) req->queue;
1532
1533 if (raidbp->rf_buf.b_flags & B_ERROR) {
1534 #if 0
1535 printf("Setting bp->b_flags!!! %d\n",raidbp->rf_buf.b_error);
1536 #endif
1537 bp->b_flags |= B_ERROR;
1538 bp->b_error = raidbp->rf_buf.b_error ?
1539 raidbp->rf_buf.b_error : EIO;
1540 }
1541
1542 #if 0
1543 db1_printf(("raidbp->rf_buf.b_bcount=%d\n",(int)raidbp->rf_buf.b_bcount));
1544 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n",(int)raidbp->rf_buf.b_bufsize));
1545 db1_printf(("raidbp->rf_buf.b_resid=%d\n",(int)raidbp->rf_buf.b_resid));
1546 db1_printf(("raidbp->rf_buf.b_data=0x%x\n",raidbp->rf_buf.b_data));
1547 #endif
1548
1549 /* XXX methinks this could be wrong... */
1550 #if 1
1551 bp->b_resid = raidbp->rf_buf.b_resid;
1552 #endif
1553
1554 if (req->tracerec) {
1555 RF_ETIMER_STOP(req->tracerec->timer);
1556 RF_ETIMER_EVAL(req->tracerec->timer);
1557 RF_LOCK_MUTEX(rf_tracing_mutex);
1558 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1559 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1560 req->tracerec->num_phys_ios++;
1561 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1562 }
1563
1564 bp->b_bcount = raidbp->rf_buf.b_bcount;/* XXXX ?? */
1565
1566 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1567
1568
1569 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go ballistic,
1570 and mark the component as hosed... */
1571 #if 1
1572 if (bp->b_flags&B_ERROR) {
1573 /* Mark the disk as dead */
1574 /* but only mark it once... */
1575 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1576 rf_ds_optimal) {
1577 printf("raid%d: IO Error. Marking %s as failed.\n",
1578 unit, queue->raidPtr->Disks[queue->row][queue->col].devname );
1579 queue->raidPtr->Disks[queue->row][queue->col].status =
1580 rf_ds_failed;
1581 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1582 queue->raidPtr->numFailures++;
1583 } else { /* Disk is already dead... */
1584 /* printf("Disk already marked as dead!\n"); */
1585 }
1586
1587 }
1588 #endif
1589
1590 rs = &raid_softc[unit];
1591 RAIDPUTBUF(rs,raidbp);
1592
1593
1594 if (bp->b_resid==0) {
1595 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1596 unit, bp->b_resid, bp->b_bcount));
1597 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1598 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1599 } else {
1600 db1_printf(("b_resid is still %ld\n",bp->b_resid));
1601 }
1602
1603 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1604 (req->CompleteFunc)(req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1605 /* printf("Exiting KernelWakeupFunc\n"); */
1606
1607 splx(s); /* XXX */
1608 }
1609
1610
1611
1612 /*
1613 * initialize a buf structure for doing an I/O in the kernel.
1614 */
1615 static void InitBP(
1616 struct buf *bp,
1617 struct vnode *b_vp,
1618 unsigned rw_flag,
1619 dev_t dev,
1620 RF_SectorNum_t startSect,
1621 RF_SectorCount_t numSect,
1622 caddr_t buf,
1623 void (*cbFunc)(struct buf *),
1624 void *cbArg,
1625 int logBytesPerSector,
1626 struct proc *b_proc)
1627 {
1628 /* bp->b_flags = B_PHYS | rw_flag; */
1629 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1630 bp->b_bcount = numSect << logBytesPerSector;
1631 bp->b_bufsize = bp->b_bcount;
1632 bp->b_error = 0;
1633 bp->b_dev = dev;
1634 db1_printf(("bp->b_dev is %d\n", dev));
1635 bp->b_un.b_addr = buf;
1636 #if 0
1637 db1_printf(("bp->b_data=0x%x\n",bp->b_data));
1638 #endif
1639
1640 bp->b_blkno = startSect;
1641 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1642 db1_printf(("b_bcount is: %d\n",(int)bp->b_bcount));
1643 if (bp->b_bcount == 0) {
1644 panic("bp->b_bcount is zero in InitBP!!\n");
1645 }
1646 bp->b_proc = b_proc;
1647 bp->b_iodone = cbFunc;
1648 bp->b_vp = b_vp;
1649
1650 }
1651
1652 /* Extras... */
1653
1654 unsigned int rpcc()
1655 {
1656 /* XXX no clue what this is supposed to do.. my guess is
1657 that it's supposed to read the CPU cycle counter... */
1658 /* db1_printf("this is supposed to do something useful too!??\n"); */
1659 return(0);
1660 }
1661
1662 #if 0
1663 int rf_GetSpareTableFromDaemon(req)
1664 RF_SparetWait_t *req;
1665 {
1666 int retcode=1;
1667 printf("This is supposed to do something useful!!\n"); /* XXX */
1668
1669 return(retcode);
1670
1671 }
1672 #endif
1673
1674 static void
1675 raidgetdefaultlabel(raidPtr, rs, lp)
1676 RF_Raid_t *raidPtr;
1677 struct raid_softc *rs;
1678 struct disklabel *lp;
1679 {
1680 db1_printf(("Building a default label...\n"));
1681 bzero(lp, sizeof(*lp));
1682
1683 /* fabricate a label... */
1684 lp->d_secperunit = raidPtr->totalSectors;
1685 lp->d_secsize = raidPtr->bytesPerSector;
1686 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1687 lp->d_ntracks = 1;
1688 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1689 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1690
1691 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1692 lp->d_type = DTYPE_RAID;
1693 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1694 lp->d_rpm = 3600;
1695 lp->d_interleave = 1;
1696 lp->d_flags = 0;
1697
1698 lp->d_partitions[RAW_PART].p_offset = 0;
1699 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1700 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1701 lp->d_npartitions = RAW_PART + 1;
1702
1703 lp->d_magic = DISKMAGIC;
1704 lp->d_magic2 = DISKMAGIC;
1705 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1706
1707 }
1708
1709 /*
1710 * Read the disklabel from the raid device. If one is not present, fake one
1711 * up.
1712 */
1713 static void
1714 raidgetdisklabel(dev)
1715 dev_t dev;
1716 {
1717 int unit = raidunit(dev);
1718 struct raid_softc *rs = &raid_softc[unit];
1719 char *errstring;
1720 struct disklabel *lp = rs->sc_dkdev.dk_label;
1721 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1722 RF_Raid_t *raidPtr;
1723
1724 db1_printf(("Getting the disklabel...\n"));
1725
1726 bzero(clp, sizeof(*clp));
1727
1728 raidPtr = raidPtrs[unit];
1729
1730 raidgetdefaultlabel(raidPtr, rs, lp);
1731
1732 /*
1733 * Call the generic disklabel extraction routine.
1734 */
1735 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1736 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1737 if (errstring)
1738 raidmakedisklabel(rs);
1739 else {
1740 int i;
1741 struct partition *pp;
1742
1743 /*
1744 * Sanity check whether the found disklabel is valid.
1745 *
1746 * This is necessary since total size of the raid device
1747 * may vary when an interleave is changed even though exactly
1748 * same componets are used, and old disklabel may used
1749 * if that is found.
1750 */
1751 if (lp->d_secperunit != rs->sc_size)
1752 printf("WARNING: %s: "
1753 "total sector size in disklabel (%d) != "
1754 "the size of raid (%d)\n", rs->sc_xname,
1755 lp->d_secperunit, rs->sc_size);
1756 for (i = 0; i < lp->d_npartitions; i++) {
1757 pp = &lp->d_partitions[i];
1758 if (pp->p_offset + pp->p_size > rs->sc_size)
1759 printf("WARNING: %s: end of partition `%c' "
1760 "exceeds the size of raid (%d)\n",
1761 rs->sc_xname, 'a' + i, rs->sc_size);
1762 }
1763 }
1764
1765 }
1766
1767 /*
1768 * Take care of things one might want to take care of in the event
1769 * that a disklabel isn't present.
1770 */
1771 static void
1772 raidmakedisklabel(rs)
1773 struct raid_softc *rs;
1774 {
1775 struct disklabel *lp = rs->sc_dkdev.dk_label;
1776 db1_printf(("Making a label..\n"));
1777
1778 /*
1779 * For historical reasons, if there's no disklabel present
1780 * the raw partition must be marked FS_BSDFFS.
1781 */
1782
1783 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1784
1785 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1786
1787 lp->d_checksum = dkcksum(lp);
1788 }
1789
1790 /*
1791 * Lookup the provided name in the filesystem. If the file exists,
1792 * is a valid block device, and isn't being used by anyone else,
1793 * set *vpp to the file's vnode.
1794 * You'll find the original of this in ccd.c
1795 */
1796 int
1797 raidlookup(path, p, vpp)
1798 char *path;
1799 struct proc *p;
1800 struct vnode **vpp; /* result */
1801 {
1802 struct nameidata nd;
1803 struct vnode *vp;
1804 struct vattr va;
1805 int error;
1806
1807 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1808 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1809 #ifdef DEBUG
1810 printf("RAIDframe: vn_open returned %d\n",error);
1811 #endif
1812 return (error);
1813 }
1814 vp = nd.ni_vp;
1815 if (vp->v_usecount > 1) {
1816 VOP_UNLOCK(vp, 0);
1817 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1818 return (EBUSY);
1819 }
1820 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1821 VOP_UNLOCK(vp, 0);
1822 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1823 return (error);
1824 }
1825 /* XXX: eventually we should handle VREG, too. */
1826 if (va.va_type != VBLK) {
1827 VOP_UNLOCK(vp, 0);
1828 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1829 return (ENOTBLK);
1830 }
1831 VOP_UNLOCK(vp, 0);
1832 *vpp = vp;
1833 return (0);
1834 }
1835
1836 /*
1837 * Wait interruptibly for an exclusive lock.
1838 *
1839 * XXX
1840 * Several drivers do this; it should be abstracted and made MP-safe.
1841 * (Hmm... where have we seen this warning before :-> GO )
1842 */
1843 static int
1844 raidlock(rs)
1845 struct raid_softc *rs;
1846 {
1847 int error;
1848
1849 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1850 rs->sc_flags |= RAIDF_WANTED;
1851 if ((error =
1852 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1853 return (error);
1854 }
1855 rs->sc_flags |= RAIDF_LOCKED;
1856 return (0);
1857 }
1858
1859 /*
1860 * Unlock and wake up any waiters.
1861 */
1862 static void
1863 raidunlock(rs)
1864 struct raid_softc *rs;
1865 {
1866
1867 rs->sc_flags &= ~RAIDF_LOCKED;
1868 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1869 rs->sc_flags &= ~RAIDF_WANTED;
1870 wakeup(rs);
1871 }
1872 }
1873