Home | History | Annotate | Line # | Download | only in raidframe
rf_driver.c revision 1.2
      1 /*	$NetBSD: rf_driver.c,v 1.2 1998/11/13 13:45:15 drochner Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
      7  *         Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
      8  *
      9  * Permission to use, copy, modify and distribute this software and
     10  * its documentation is hereby granted, provided that both the copyright
     11  * notice and this permission notice appear in all copies of the
     12  * software, derivative works or modified versions, and any portions
     13  * thereof, and that both notices appear in supporting documentation.
     14  *
     15  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     16  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     17  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     18  *
     19  * Carnegie Mellon requests users of this software to return to
     20  *
     21  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     22  *  School of Computer Science
     23  *  Carnegie Mellon University
     24  *  Pittsburgh PA 15213-3890
     25  *
     26  * any improvements or extensions that they make and grant Carnegie the
     27  * rights to redistribute these changes.
     28  */
     29 
     30 /******************************************************************************
     31  *
     32  * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
     33  *
     34  * all routines are prefixed with rf_ (raidframe), to avoid conficts.
     35  *
     36  ******************************************************************************/
     37 
     38 /*
     39  * :
     40  * Log: rf_driver.c,v
     41  * Revision 1.147  1996/08/21 04:12:46  jimz
     42  * added hook for starting out req_hist w/ more distributed values
     43  * (currently not done)
     44  *
     45  * Revision 1.146  1996/07/29  14:05:12  jimz
     46  * fix numPUs/numRUs confusion (everything is now numRUs)
     47  * clean up some commenting, return values
     48  *
     49  * Revision 1.145  1996/07/28  20:31:39  jimz
     50  * i386netbsd port
     51  * true/false fixup
     52  *
     53  * Revision 1.144  1996/07/27  18:40:24  jimz
     54  * cleanup sweep
     55  *
     56  * Revision 1.143  1996/07/22  21:11:53  jimz
     57  * fix formatting on DoAccess error msg
     58  *
     59  * Revision 1.142  1996/07/19  16:10:06  jimz
     60  * added call to rf_ResetDebugOptions() in rf_ConfigureDebug()
     61  *
     62  * Revision 1.141  1996/07/18  22:57:14  jimz
     63  * port simulator to AIX
     64  *
     65  * Revision 1.140  1996/07/17  21:00:58  jimz
     66  * clean up timer interface, tracing
     67  *
     68  * Revision 1.139  1996/07/15  05:40:41  jimz
     69  * some recon datastructure cleanup
     70  * better handling of multiple failures
     71  * added undocumented double-recon test
     72  *
     73  * Revision 1.138  1996/07/11  19:08:00  jimz
     74  * generalize reconstruction mechanism
     75  * allow raid1 reconstructs via copyback (done with array
     76  * quiesced, not online, therefore not disk-directed)
     77  *
     78  * Revision 1.137  1996/07/10  22:28:00  jimz
     79  * get rid of obsolete row statuses (dead,degraded2)
     80  *
     81  * Revision 1.136  1996/06/17  14:38:33  jimz
     82  * properly #if out RF_DEMO code
     83  * fix bug in MakeConfig that was causing weird behavior
     84  * in configuration routines (config was not zeroed at start)
     85  * clean up genplot handling of stacks
     86  *
     87  * Revision 1.135  1996/06/17  03:20:32  jimz
     88  * move out raidframe_attr_default
     89  * don't monkey with stack sizes
     90  *
     91  * Revision 1.134  1996/06/14  23:15:38  jimz
     92  * attempt to deal with thread GC problem
     93  *
     94  * Revision 1.133  1996/06/14  21:24:08  jimz
     95  * new ConfigureEtimer init
     96  * moved out timer vars
     97  *
     98  * Revision 1.132  1996/06/14  16:19:03  jimz
     99  * remove include of pdllib.h (beginning of PDL cleanup)
    100  *
    101  * Revision 1.131  1996/06/14  14:35:24  jimz
    102  * clean up dfstrace protection
    103  *
    104  * Revision 1.130  1996/06/14  14:16:09  jimz
    105  * engine config is now array-specific
    106  *
    107  * Revision 1.129  1996/06/13  19:08:10  jimz
    108  * add debug var to force keep_acc_totals on
    109  *
    110  * Revision 1.128  1996/06/11  10:57:08  jimz
    111  * init recon_done_proc_mutex
    112  *
    113  * Revision 1.127  1996/06/10  14:18:58  jimz
    114  * move user, throughput stats into per-array structure
    115  *
    116  * Revision 1.126  1996/06/10  11:55:47  jimz
    117  * Straightened out some per-array/not-per-array distinctions, fixed
    118  * a couple bugs related to confusion. Added shutdown lists. Removed
    119  * layout shutdown function (now subsumed by shutdown lists).
    120  *
    121  * Revision 1.125  1996/06/09  02:36:46  jimz
    122  * lots of little crufty cleanup- fixup whitespace
    123  * issues, comment #ifdefs, improve typing in some
    124  * places (esp size-related)
    125  *
    126  * Revision 1.124  1996/06/07  21:33:04  jimz
    127  * begin using consistent types for sector numbers,
    128  * stripe numbers, row+col numbers, recon unit numbers
    129  *
    130  * Revision 1.123  1996/06/05  19:38:32  jimz
    131  * fixed up disk queueing types config
    132  * added sstf disk queueing
    133  * fixed exit bug on diskthreads (ref-ing bad mem)
    134  *
    135  * Revision 1.122  1996/06/05  18:06:02  jimz
    136  * Major code cleanup. The Great Renaming is now done.
    137  * Better modularity. Better typing. Fixed a bunch of
    138  * synchronization bugs. Made a lot of global stuff
    139  * per-desc or per-array. Removed dead code.
    140  *
    141  * Revision 1.121  1996/06/03  23:28:26  jimz
    142  * more bugfixes
    143  * check in tree to sync for IPDS runs with current bugfixes
    144  * there still may be a problem with threads in the script test
    145  * getting I/Os stuck- not trivially reproducible (runs ~50 times
    146  * in a row without getting stuck)
    147  *
    148  * Revision 1.120  1996/06/02  17:31:48  jimz
    149  * Moved a lot of global stuff into array structure, where it belongs.
    150  * Fixed up paritylogging, pss modules in this manner. Some general
    151  * code cleanup. Removed lots of dead code, some dead files.
    152  *
    153  * Revision 1.119  1996/05/31  22:26:54  jimz
    154  * fix a lot of mapping problems, memory allocation problems
    155  * found some weird lock issues, fixed 'em
    156  * more code cleanup
    157  *
    158  * Revision 1.118  1996/05/30  23:22:16  jimz
    159  * bugfixes of serialization, timing problems
    160  * more cleanup
    161  *
    162  * Revision 1.117  1996/05/30  16:28:33  jimz
    163  * typo in rf_SignalQuiescenceLock() fixed
    164  *
    165  * Revision 1.116  1996/05/30  12:59:18  jimz
    166  * make etimer happier, more portable
    167  *
    168  * Revision 1.115  1996/05/30  11:29:41  jimz
    169  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
    170  * about when stripes should be locked (I made it consistent: no parity, no lock)
    171  * There was a lot of extra serialization of I/Os which I've removed- a lot of
    172  * it was to calculate values for the cache code, which is no longer with us.
    173  * More types, function, macro cleanup. Added code to properly quiesce the array
    174  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
    175  * before. Fixed memory allocation, freeing bugs.
    176  *
    177  * Revision 1.114  1996/05/27  18:56:37  jimz
    178  * more code cleanup
    179  * better typing
    180  * compiles in all 3 environments
    181  *
    182  * Revision 1.113  1996/05/24  22:17:04  jimz
    183  * continue code + namespace cleanup
    184  * typed a bunch of flags
    185  *
    186  * Revision 1.112  1996/05/24  04:28:55  jimz
    187  * release cleanup ckpt
    188  *
    189  * Revision 1.111  1996/05/24  01:59:45  jimz
    190  * another checkpoint in code cleanup for release
    191  * time to sync kernel tree
    192  *
    193  * Revision 1.110  1996/05/23  21:46:35  jimz
    194  * checkpoint in code cleanup (release prep)
    195  * lots of types, function names have been fixed
    196  *
    197  * Revision 1.109  1996/05/23  00:39:56  jimz
    198  * demoMode -> rf_demoMode
    199  *
    200  * Revision 1.108  1996/05/23  00:33:23  jimz
    201  * code cleanup: move all debug decls to rf_options.c, all extern
    202  * debug decls to rf_options.h, all debug vars preceded by rf_
    203  *
    204  * Revision 1.107  1996/05/21  14:30:04  jimz
    205  * idler_desc_mutex should be ifndef SIMULATE
    206  *
    207  * Revision 1.106  1996/05/20  19:31:12  jimz
    208  * add atomic debug (mutex and cond leak finder) stuff
    209  *
    210  * Revision 1.105  1996/05/20  16:12:45  jimz
    211  * switch to rf_{mutex,cond}_{init,destroy}
    212  *
    213  * Revision 1.104  1996/05/18  20:09:41  jimz
    214  * bit of cleanup to compile cleanly in kernel, once again
    215  *
    216  * Revision 1.103  1996/05/18  19:51:34  jimz
    217  * major code cleanup- fix syntax, make some types consistent,
    218  * add prototypes, clean out dead code, et cetera
    219  *
    220  * Revision 1.102  1996/05/16  21:20:51  jimz
    221  * use FREELIST stuff to manage access descriptors
    222  *
    223  * Revision 1.101  1996/05/16  14:21:10  jimz
    224  * remove bogus copies from write path on user
    225  *
    226  * Revision 1.100  1996/05/15  22:33:54  jimz
    227  * appropriately #ifdef cache stuff
    228  *
    229  * Revision 1.99  1996/05/08  21:34:41  jimz
    230  * #if 0 ShutdownCache() and ConfigureCache()
    231  *
    232  * Revision 1.98  1996/05/08  21:01:24  jimz
    233  * fixed up enum type names that were conflicting with other
    234  * enums and function names (ie, "panic")
    235  * future naming trends will be towards RF_ and rf_ for
    236  * everything raidframe-related
    237  *
    238  * Revision 1.97  1996/05/07  19:02:58  wvcii
    239  * corrected header comment of rf_DoAccess()
    240  * reordered free of desc in FreeRaidAccDesc()  The desc is now
    241  * freed last.
    242  *
    243  * Revision 1.96  1996/05/07  17:40:50  jimz
    244  * add doDebug
    245  *
    246  * Revision 1.95  1996/05/06  21:35:23  jimz
    247  * fixed ordering of cleanup and removed extra decrement of configureCount
    248  *
    249  * Revision 1.94  1996/05/06  18:44:14  jimz
    250  * reorder cleanup to not blow alloclist out from under various modules
    251  * zero raidPtr contents on config
    252  *
    253  * Revision 1.93  1996/05/04  17:06:53  jimz
    254  * Fail the I/O with ENOSPC if reading past end of the array in the kernel.
    255  *
    256  * Revision 1.92  1996/05/03  19:44:22  wvcii
    257  * debug vars degDagDebug and enableAtomicRMW now defined
    258  * in this file.
    259  *
    260  * Revision 1.91  1995/12/12  18:10:06  jimz
    261  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    262  * fix 80-column brain damage in comments
    263  *
    264  * Revision 1.90  1995/12/08  15:07:03  arw
    265  * cache code cleanup
    266  *
    267  * Revision 1.89  1995/12/06  20:53:58  wvcii
    268  * created debug var forceParityLogReint
    269  * this variable forces reintegration of all parity logs at shutdown
    270  *
    271  * Revision 1.88  1995/12/01  15:59:10  root
    272  * added copyright info
    273  *
    274  * Revision 1.87  1995/11/28  21:34:02  amiri
    275  * modified SetReconfiguredMode so that it installs the
    276  * spare table only if arch is declustered based on block designs
    277  *
    278  * Revision 1.86  1995/11/21  23:06:11  amiri
    279  * added division by zero check in printing
    280  * throughput stats.
    281  *
    282  * Revision 1.85  1995/11/19  16:27:25  wvcii
    283  * disableParityVerify now defined locally, only read from config
    284  * file for !KERNEL compiles
    285  *
    286  * Revision 1.84  1995/11/17  15:08:31  wvcii
    287  * added debug var disableParityVerify
    288  * used in RealLoopTest to disable parity verification
    289  *
    290  * Revision 1.83  1995/11/07  15:48:43  wvcii
    291  * deleted debug vars: suppressAtomicRMW, enableRollAway, concatDagDebug
    292  * deleted debug vars: debugSelectUnit, debugSelectBlock
    293  * added debug var: enableAtomicRMW
    294  *
    295  * Revision 1.82  1995/10/18  19:28:45  amiri
    296  * added support for reconstruction demos in the
    297  * simulator, by updating some simulator
    298  * variables in Faildisk.
    299  *
    300  * Revision 1.81  1995/10/09  18:36:33  jimz
    301  * move rf_StopThroughputStats() into FreeAccDesc()
    302  * changed throughput output print format
    303  * added user-level copy to write path to emulate kernel hack
    304  *
    305  * Revision 1.80  1995/10/09  18:07:47  wvcii
    306  * moved call to rf_StopThroughputStats to rf_states.c
    307  *
    308  * Revision 1.79  1995/10/09  17:38:53  jimz
    309  * quiesce an array for user-level testing before shutting it down
    310  * (should this also be done in the kernel?)
    311  *
    312  * Revision 1.78  1995/10/09  15:35:43  wvcii
    313  * added code to measure throughput in user mode
    314  *
    315  * Revision 1.77  1995/10/05  06:18:59  jimz
    316  * Changed DDEventRequest() to take additional arg, used by simulator
    317  * to cache diskid so queue length can be decremented on io complete
    318  * (this is a hack to get around the fact that the event mechanism
    319  * assumes it can dereference arbitrary handles on enqueued events)
    320  *
    321  * Revision 1.76  1995/10/04  07:25:10  jimz
    322  * turn off bigstacks by default
    323  *
    324  * Revision 1.75  1995/10/04  07:24:34  jimz
    325  * code for bigstacks in user process
    326  *
    327  * Revision 1.74  1995/09/26  21:42:51  wvcii
    328  * removed calls to ConfigureCache, ShutdownCache when building kernel
    329  * kernel currently does not support any cached architectures
    330  *
    331  * Revision 1.73  1995/09/20  21:05:35  jimz
    332  * add missing unit arg to IO_BUF_ERR() in non-kernel case
    333  *
    334  * Revision 1.72  1995/09/19  23:02:44  jimz
    335  * call RF_DKU_END_IO in the appropriate places
    336  *
    337  * Revision 1.71  1995/09/07  19:02:31  jimz
    338  * mods to get raidframe to compile and link
    339  * in kernel environment
    340  *
    341  * Revision 1.70  1995/09/06  19:24:01  wvcii
    342  * added debug vars enableRollAway and debugRecovery
    343  *
    344  * Revision 1.69  1995/08/24  19:25:36  rachad
    345  * Fixes to LSS GC in the simulater
    346  *
    347  * Revision 1.68  1995/07/28  21:43:42  robby
    348  * checkin after leaving for Rice. Bye
    349  *
    350  * Revision 1.67  1995/07/26  18:06:52  cfb
    351  * *** empty log message ***
    352  *
    353  * Revision 1.66  1995/07/26  03:25:24  robby
    354  * fixed accesses mutex and updated call to ConfigureCache
    355  *
    356  * Revision 1.65  1995/07/25  14:36:52  rachad
    357  * *** empty log message ***
    358  *
    359  * Revision 1.64  1995/07/21  19:29:05  robby
    360  * added total_accesses
    361  *
    362  * Revision 1.63  1995/07/20  19:43:35  cfb
    363  * *** empty log message ***
    364  *
    365  * Revision 1.62  1995/07/20  16:10:24  rachad
    366  * *** empty log message ***
    367  *
    368  * Revision 1.61  1995/07/20  03:36:53  rachad
    369  * Added suport for cache warming
    370  *
    371  * Revision 1.60  1995/07/17  22:31:31  cfb
    372  * *** empty log message ***
    373  *
    374  * Revision 1.59  1995/07/16  17:02:23  cfb
    375  * *** empty log message ***
    376  *
    377  * Revision 1.58  1995/07/16  15:19:27  cfb
    378  * *** empty log message ***
    379  *
    380  * Revision 1.57  1995/07/16  03:17:01  cfb
    381  * *** empty log message ***
    382  *
    383  * Revision 1.56  1995/07/13  16:11:59  cfb
    384  * *** empty log message ***
    385  *
    386  * Revision 1.55  1995/07/13  15:42:40  cfb
    387  * added cacheDebug variable ...
    388  *
    389  * Revision 1.54  1995/07/13  14:28:27  rachad
    390  * *** empty log message ***
    391  *
    392  * Revision 1.53  1995/07/10  21:48:52  robby
    393  * added virtualStripingWarnings
    394  *
    395  * Revision 1.52  1995/07/10  20:41:13  rachad
    396  * *** empty log message ***
    397  *
    398  * Revision 1.51  1995/07/09  19:46:49  cfb
    399  * Added cache Shutdown
    400  *
    401  * Revision 1.50  1995/07/08  21:38:53  rachad
    402  * Added support for interactive traces
    403  * in the simulator
    404  *
    405  * Revision 1.49  1995/07/08  18:05:39  rachad
    406  * Linked up Claudsons code with the real cache
    407  *
    408  * Revision 1.48  1995/07/07  16:00:22  cfb
    409  * Added initialization of cacheDesc to AllocRaidAccDesc
    410  *
    411  * Revision 1.47  1995/07/06  14:22:37  rachad
    412  * Merge complete
    413  *
    414  * Revision 1.46.50.2  1995/06/21  17:48:30  robby
    415  * test
    416  *
    417  * Revision 1.46.50.1  1995/06/21  17:34:49  robby
    418  * branching to work on "meta-dag" capabilities
    419  *
    420  * Revision 1.46.10.5  1995/07/03  21:58:34  holland
    421  * added support for suppressing both stripe locks & large writes
    422  *
    423  * Revision 1.46.10.4  1995/06/27  03:42:48  holland
    424  * typo fix
    425  *
    426  * Revision 1.46.10.3  1995/06/27  03:31:42  holland
    427  * prototypes
    428  *
    429  * Revision 1.46.10.2  1995/06/27  03:17:57  holland
    430  * fixed callback bug in kernel rf_DoAccess
    431  *
    432  * Revision 1.46.10.1  1995/06/25  14:32:44  holland
    433  * initial checkin on new branch
    434  *
    435  * Revision 1.46  1995/06/13  17:52:41  holland
    436  * added UserStats stuff
    437  *
    438  * Revision 1.45  1995/06/13  16:03:41  rachad
    439  * *** empty log message ***
    440  *
    441  * Revision 1.44  1995/06/12  15:54:40  rachad
    442  * Added garbege collection for log structured storage
    443  *
    444  * Revision 1.43  1995/06/09  18:01:09  holland
    445  * various changes related to in-kernel recon, multiple-row arrays,
    446  * trace extraction from kernel, etc.
    447  *
    448  * Revision 1.42  1995/06/08  19:52:28  rachad
    449  * *** empty log message ***
    450  *
    451  * Revision 1.41  1995/06/08  00:11:49  robby
    452  * added a debug variable -- showVirtualSizeRequirements
    453  *
    454  * Revision 1.40  1995/06/05  00:33:30  holland
    455  * protectedSectors bug fix
    456  *
    457  * Revision 1.39  1995/06/01  22:45:03  holland
    458  * made compilation of parity logging and virtual striping
    459  * stuff conditional on some constants defined in rf_archs.h
    460  *
    461  * Revision 1.38  1995/06/01  21:52:37  holland
    462  * replaced NULL sizes in calls to Free() by -1, and caused this
    463  * to suppress the size-mismatch error
    464  *
    465  * Revision 1.37  1995/05/26  20:04:54  wvcii
    466  * modified parity logging debug vars
    467  *
    468  * Revision 1.36  95/05/21  15:32:41  wvcii
    469  * added debug vars: parityLogDebug, numParityRegions, numParityLogs,
    470  * numReintegrationThreads
    471  *
    472  * Revision 1.35  95/05/19  20:58:21  holland
    473  * cleanups on error cases in rf_DoAccess
    474  *
    475  * Revision 1.34  1995/05/16  17:35:53  holland
    476  * added rf_copyback_in_progress.  this is debug-only.
    477  *
    478  * Revision 1.33  1995/05/15  12:25:35  holland
    479  * bug fix in test code: no stripe locks were getting acquired in RAID0 mode
    480  *
    481  * Revision 1.32  1995/05/10  18:54:12  holland
    482  * bug fixes related to deadlock problem at time of disk failure
    483  * eliminated read-op-write code
    484  * beefed up parity checking in loop test
    485  * various small changes & new ASSERTs
    486  *
    487  * Revision 1.31  1995/05/02  22:49:02  holland
    488  * add shutdown calls for each architecture
    489  *
    490  * Revision 1.30  1995/05/01  14:43:37  holland
    491  * merged changes from Bill
    492  *
    493  * Revision 1.29  1995/05/01  13:28:00  holland
    494  * parity range locks, locking disk requests, recon+parityscan in kernel, etc.
    495  *
    496  * Revision 1.28  1995/04/24  13:25:51  holland
    497  * rewrite to move disk queues, recon, & atomic RMW to kernel
    498  *
    499  * Revision 1.27  1995/04/06  14:47:56  rachad
    500  * merge completed
    501  *
    502  * Revision 1.26  1995/04/03  20:32:35  rachad
    503  * added reconstruction to simulator
    504  *
    505  * Revision 1.25.10.2  1995/04/03  20:41:00  holland
    506  * misc changes related to distributed sparing
    507  *
    508  * Revision 1.25.10.1  1995/03/17  20:04:01  holland
    509  * initial checkin on new branch
    510  *
    511  * Revision 1.25  1995/03/15  20:34:30  holland
    512  * changes for distributed sparing.
    513  *
    514  * Revision 1.24  1995/03/09  19:53:05  rachad
    515  * *** empty log message ***
    516  *
    517  * Revision 1.23  1995/03/03  18:36:16  rachad
    518  *  Simulator mechanism added
    519  *
    520  * Revision 1.22  1995/03/01  20:25:48  holland
    521  * kernelization changes
    522  *
    523  * Revision 1.21  1995/02/17  19:39:56  holland
    524  * added size param to all calls to Free().
    525  * this is ignored at user level, but necessary in the kernel.
    526  *
    527  * Revision 1.20  1995/02/17  13:37:49  holland
    528  * kernelization changes -- not yet complete
    529  *
    530  * Revision 1.19  1995/02/10  18:08:07  holland
    531  * fixed a few things I broke during kernelization
    532  *
    533  * Revision 1.18  1995/02/10  17:34:10  holland
    534  * kernelization changes
    535  *
    536  * Revision 1.17  1995/02/04  15:51:35  holland
    537  * kernelization changes
    538  *
    539  * Revision 1.16  1995/02/03  22:31:36  holland
    540  * many changes related to kernelization
    541  *
    542  * Revision 1.15  1995/02/01  15:13:05  holland
    543  * moved #include of general.h out of raid.h and into each file
    544  *
    545  * Revision 1.14  1995/02/01  14:25:19  holland
    546  * began changes for kernelization:
    547  *      changed all instances of mutex_t and cond_t to DECLARE macros
    548  *      converted configuration code to use config structure
    549  *
    550  * Revision 1.13  1995/01/30  14:53:46  holland
    551  * extensive changes related to making DoIO non-blocking
    552  *
    553  * Revision 1.12  1995/01/25  00:26:21  holland
    554  * eliminated support for aio
    555  *
    556  * Revision 1.11  1995/01/24  23:58:46  holland
    557  * multi-way recon XOR, plus various small changes
    558  *
    559  * Revision 1.10  1995/01/11  19:27:02  holland
    560  * various changes related to performance tuning
    561  *
    562  * Revision 1.9  1994/12/05  15:29:09  holland
    563  * added trace run time limitation (maxTraceRunTimeSec)
    564  *
    565  * Revision 1.8  1994/12/05  04:18:12  holland
    566  * various new control vars in the config file
    567  *
    568  * Revision 1.7  1994/11/29  23:11:36  holland
    569  * tracerec bug on dag retry fixed
    570  *
    571  * Revision 1.6  1994/11/29  22:11:38  danner
    572  * holland updates
    573  *
    574  * Revision 1.5  1994/11/29  21:09:47  danner
    575  * Detailed tracing support (holland).
    576  *
    577  * Revision 1.4  1994/11/29  20:36:02  danner
    578  * Added suppressAtomicRMW option.
    579  *
    580  * Revision 1.3  1994/11/21  15:34:06  danner
    581  * Added ConfigureAllocList() call.
    582  *
    583  */
    584 
    585 #ifdef _KERNEL
    586 #define KERNEL
    587 #endif
    588 
    589 #if defined(__NetBSD__) && defined(_KERNEL)
    590 #include <sys/types.h>
    591 #include <sys/param.h>
    592 #include <sys/systm.h>
    593 #include <sys/ioctl.h>
    594 #include <sys/fcntl.h>
    595 #include <sys/vnode.h>
    596 #endif
    597 
    598 #ifdef KERNEL
    599 #ifndef __NetBSD__
    600 #include <dkusage.h>
    601 #include <dfstrace.h>
    602 #endif /* !__NetBSD__ */
    603 #endif /* KERNEL */
    604 
    605 #include "rf_archs.h"
    606 #include "rf_threadstuff.h"
    607 
    608 #ifndef KERNEL
    609 #include <stdio.h>
    610 #include <stdlib.h>
    611 #endif /* KERNEL */
    612 
    613 #include <sys/errno.h>
    614 
    615 #include "rf_raid.h"
    616 #include "rf_dag.h"
    617 #include "rf_aselect.h"
    618 #include "rf_diskqueue.h"
    619 #include "rf_parityscan.h"
    620 #include "rf_alloclist.h"
    621 #include "rf_threadid.h"
    622 #include "rf_dagutils.h"
    623 #include "rf_utils.h"
    624 #include "rf_etimer.h"
    625 #include "rf_acctrace.h"
    626 #include "rf_configure.h"
    627 #include "rf_general.h"
    628 #include "rf_desc.h"
    629 #include "rf_states.h"
    630 #include "rf_freelist.h"
    631 #include "rf_decluster.h"
    632 #include "rf_map.h"
    633 #include "rf_diskthreads.h"
    634 #include "rf_revent.h"
    635 #include "rf_callback.h"
    636 #include "rf_engine.h"
    637 #include "rf_memchunk.h"
    638 #include "rf_mcpair.h"
    639 #include "rf_nwayxor.h"
    640 #include "rf_debugprint.h"
    641 #include "rf_copyback.h"
    642 #if !defined(__NetBSD__)
    643 #include "rf_camlayer.h"
    644 #endif
    645 #include "rf_driver.h"
    646 #include "rf_options.h"
    647 #include "rf_shutdown.h"
    648 #include "rf_sys.h"
    649 #include "rf_cpuutil.h"
    650 
    651 #ifdef SIMULATE
    652 #include "rf_diskevent.h"
    653 #endif /* SIMULATE */
    654 
    655 #ifdef KERNEL
    656 #include <sys/buf.h>
    657 #ifndef __NetBSD__
    658 #include <io/common/devdriver.h>
    659 #endif /* !__NetBSD__ */
    660 
    661 #if DFSTRACE > 0
    662 #include <sys/dfs_log.h>
    663 #include <sys/dfstracebuf.h>
    664 #endif /* DFSTRACE > 0 */
    665 
    666 #if DKUSAGE > 0
    667 #include <sys/dkusage.h>
    668 #include <io/common/iotypes.h>
    669 #include <io/cam/dec_cam.h>
    670 #include <io/cam/cam.h>
    671 #include <io/cam/pdrv.h>
    672 #endif /* DKUSAGE > 0 */
    673 #endif /* KERNEL */
    674 
    675 #if RF_DEMO > 0
    676 #include "rf_demo.h"
    677 #endif /* RF_DEMO > 0 */
    678 
    679 /* rad == RF_RaidAccessDesc_t */
    680 static RF_FreeList_t *rf_rad_freelist;
    681 #define RF_MAX_FREE_RAD 128
    682 #define RF_RAD_INC       16
    683 #define RF_RAD_INITIAL   32
    684 
    685 /* debug variables */
    686 char rf_panicbuf[2048];       /* a buffer to hold an error msg when we panic */
    687 
    688 /* main configuration routines */
    689 static int raidframe_booted = 0;
    690 
    691 static void rf_ConfigureDebug(RF_Config_t *cfgPtr);
    692 static void set_debug_option(char *name, long val);
    693 static void rf_UnconfigureArray(void);
    694 static int init_rad(RF_RaidAccessDesc_t *);
    695 static void clean_rad(RF_RaidAccessDesc_t *);
    696 static void rf_ShutdownRDFreeList(void *);
    697 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
    698 
    699 
    700 RF_DECLARE_MUTEX(rf_printf_mutex)          /* debug only:  avoids interleaved printfs by different stripes */
    701 RF_DECLARE_GLOBAL_THREADID                 /* declarations for threadid.h */
    702 
    703 #if !defined(KERNEL) && !defined(SIMULATE)
    704 static int rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
    705 static void rf_StopThroughputStats(RF_Raid_t *raidPtr);
    706 static void rf_PrintThroughputStats(RF_Raid_t *raidPtr);
    707 #endif /* !KERNEL && !SIMULATE */
    708 
    709 #ifdef KERNEL
    710 #define SIGNAL_QUIESCENT_COND(_raid_)  wakeup(&((_raid_)->accesses_suspended))
    711 #ifndef __NetBSD__
    712 #define WAIT_FOR_QUIESCENCE(_raid_) \
    713   mpsleep(&((_raid_)->accesses_suspended), PZERO, "raidframe quiesce", 0, \
    714       (void *) simple_lock_addr((_raid_)->access_suspend_mutex), MS_LOCK_SIMPLE)
    715 #else
    716 #define WAIT_FOR_QUIESCENCE(_raid_) \
    717 	tsleep(&((_raid_)->accesses_suspended),PRIBIO|PCATCH,"raidframe quiesce", 0);
    718 
    719 #endif
    720 #if DKUSAGE > 0
    721 #define IO_BUF_ERR(bp, err, unit) { \
    722 	bp->b_flags |= B_ERROR; \
    723 	bp->b_resid = bp->b_bcount; \
    724 	bp->b_error = err; \
    725 	RF_DKU_END_IO(unit, bp); \
    726 	biodone(bp); \
    727 }
    728 #else
    729 #define IO_BUF_ERR(bp, err, unit) { \
    730 	bp->b_flags |= B_ERROR; \
    731 	bp->b_resid = bp->b_bcount; \
    732 	bp->b_error = err; \
    733 	RF_DKU_END_IO(unit); \
    734 	biodone(bp); \
    735 }
    736 #endif /* DKUSAGE > 0 */
    737 #else /* KERNEL */
    738 
    739 #define SIGNAL_QUIESCENT_COND(_raid_)  RF_SIGNAL_COND((_raid_)->quiescent_cond)
    740 #define WAIT_FOR_QUIESCENCE(_raid_)    RF_WAIT_COND((_raid_)->quiescent_cond, (_raid_)->access_suspend_mutex)
    741 #define IO_BUF_ERR(bp, err, unit)
    742 
    743 #endif /* KERNEL */
    744 
    745 static int configureCount=0;         /* number of active configurations */
    746 static int isconfigged=0;            /* is basic raidframe (non per-array) stuff configged */
    747 RF_DECLARE_STATIC_MUTEX(configureMutex) /* used to lock the configuration stuff */
    748 
    749 static RF_ShutdownList_t *globalShutdown; /* non array-specific stuff */
    750 
    751 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **listp);
    752 
    753 /* called at system boot time */
    754 int rf_BootRaidframe()
    755 {
    756 #if 0
    757   long stacksize;
    758 #endif
    759   int rc;
    760 
    761   if (raidframe_booted)
    762     return(EBUSY);
    763   raidframe_booted = 1;
    764 
    765 #if RF_DEBUG_ATOMIC > 0
    766   rf_atent_init();
    767 #endif /* RF_DEBUG_ATOMIC > 0 */
    768 
    769   rf_setup_threadid();
    770   rf_assign_threadid();
    771 
    772 #if !defined(KERNEL) && !defined(SIMULATE)
    773   if (RF_THREAD_ATTR_CREATE(raidframe_attr_default)) {
    774     fprintf(stderr, "Unable to create default thread attr\n");
    775     exit(1);
    776   }
    777 #if 0
    778   stacksize = RF_THREAD_ATTR_GETSTACKSIZE(raidframe_attr_default);
    779   if (stacksize < 0) {
    780     fprintf(stderr, "Unable to get stack size of default thread attr\n");
    781     exit(1);
    782   }
    783   stacksize += 16384;
    784   rc = RF_THREAD_ATTR_SETSTACKSIZE(raidframe_attr_default, stacksize);
    785   if (rc) {
    786     fprintf(stderr, "Unable to set stack size of default thread attr\n");
    787     exit(1);
    788   }
    789 #endif /* 0 */
    790 #endif /* !KERNEL && !SIMULATE */
    791   rc = rf_mutex_init(&configureMutex);
    792   if (rc) {
    793     RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
    794       __LINE__, rc);
    795     RF_PANIC();
    796   }
    797   configureCount = 0;
    798   isconfigged = 0;
    799   globalShutdown = NULL;
    800   return(0);
    801 }
    802 
    803 /*
    804  * This function is really just for debugging user-level stuff: it
    805  * frees up all memory, other RAIDframe resources which might otherwise
    806  * be kept around. This is used with systems like "sentinel" to detect
    807  * memory leaks.
    808  */
    809 int rf_UnbootRaidframe()
    810 {
    811 	int rc;
    812 
    813 	RF_LOCK_MUTEX(configureMutex);
    814 	if (configureCount) {
    815 		RF_UNLOCK_MUTEX(configureMutex);
    816 		return(EBUSY);
    817 	}
    818 	raidframe_booted = 0;
    819 	RF_UNLOCK_MUTEX(configureMutex);
    820 	rc = rf_mutex_destroy(&configureMutex);
    821 	if (rc) {
    822 		RF_ERRORMSG3("Unable to destroy mutex file %s line %d rc=%d\n", __FILE__,
    823 			__LINE__, rc);
    824 		RF_PANIC();
    825 	}
    826 #if RF_DEBUG_ATOMIC > 0
    827 	rf_atent_shutdown();
    828 #endif /* RF_DEBUG_ATOMIC > 0 */
    829 	return(0);
    830 }
    831 
    832 /*
    833  * Called whenever an array is shutdown
    834  */
    835 static void rf_UnconfigureArray()
    836 {
    837   int rc;
    838 
    839   RF_LOCK_MUTEX(configureMutex);
    840   if (--configureCount == 0) {              /* if no active configurations, shut everything down */
    841     isconfigged = 0;
    842 
    843     rc = rf_ShutdownList(&globalShutdown);
    844     if (rc) {
    845       RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc);
    846     }
    847 
    848     rf_shutdown_threadid();
    849 
    850     /*
    851      * We must wait until now, because the AllocList module
    852      * uses the DebugMem module.
    853      */
    854     if (rf_memDebug)
    855       rf_print_unfreed();
    856   }
    857   RF_UNLOCK_MUTEX(configureMutex);
    858 }
    859 
    860 /*
    861  * Called to shut down an array.
    862  */
    863 int rf_Shutdown(raidPtr)
    864   RF_Raid_t   *raidPtr;
    865 {
    866 #if !defined(__NetBSD) && !defined(_KERNEL)
    867   int rc;
    868 #endif
    869   int r,c;
    870 
    871 #if defined(__NetBSD__) && defined(_KERNEL)
    872   struct proc *p;
    873 #endif
    874 
    875   if (!raidPtr->valid) {
    876     RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver.  Aborting shutdown\n");
    877     return(EINVAL);
    878   }
    879 
    880   /*
    881    * wait for outstanding IOs to land
    882    * As described in rf_raid.h, we use the rad_freelist lock
    883    * to protect the per-array info about outstanding descs
    884    * since we need to do freelist locking anyway, and this
    885    * cuts down on the amount of serialization we've got going
    886    * on.
    887    */
    888   RF_FREELIST_DO_LOCK(rf_rad_freelist);
    889   if (raidPtr->waitShutdown) {
    890     RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
    891     return(EBUSY);
    892   }
    893   raidPtr->waitShutdown = 1;
    894   while (raidPtr->nAccOutstanding) {
    895     RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist));
    896   }
    897   RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
    898 
    899 #if !defined(KERNEL) && !defined(SIMULATE)
    900   rf_PrintThroughputStats(raidPtr);
    901 #endif /* !KERNEL && !SIMULATE */
    902 
    903   raidPtr->valid = 0;
    904 
    905 #if !defined(KERNEL) && !defined(SIMULATE)
    906   rf_TerminateDiskQueues(raidPtr);           /* tell all disk queues to release any waiting threads */
    907   rf_ShutdownDiskThreads(raidPtr);           /* wait for all threads to exit */
    908 #endif /* !KERNEL && !SIMULATE */
    909 
    910 #if defined(__NetBSD__) && defined(_KERNEL)
    911   /* We take this opportunity to close the vnodes like we should.. */
    912 
    913   p = raidPtr->proc; /* XXX */
    914 
    915   for(r=0;r<raidPtr->numRow;r++) {
    916 	  for(c=0;c<raidPtr->numCol;c++) {
    917 		  printf("Closing vnode for row: %d col: %d\n",r,c);
    918 		  if (raidPtr->raid_cinfo[r][c].ci_vp) {
    919 			  (void)vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
    920 					 FREAD|FWRITE,  p->p_ucred, p);
    921 		  } else {
    922 			  printf("vnode was NULL\n");
    923 		  }
    924 
    925 	  }
    926   }
    927   for(r=0;r<raidPtr->numSpare;r++) {
    928 	  printf("Closing vnode for spare: %d\n",r);
    929 	  if (raidPtr->raid_cinfo[0][raidPtr->numCol+r].ci_vp) {
    930 		  (void)vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol+r].ci_vp,
    931 				 FREAD|FWRITE,  p->p_ucred, p);
    932 	  } else {
    933 		  printf("vnode was NULL\n");
    934 	  }
    935   }
    936 
    937 
    938 #endif
    939 
    940   rf_ShutdownList(&raidPtr->shutdownList);
    941 
    942   rf_UnconfigureArray();
    943 
    944   return(0);
    945 }
    946 
    947 #define DO_INIT_CONFIGURE(f) { \
    948 	rc = f (&globalShutdown); \
    949 	if (rc) { \
    950 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
    951 		rf_ShutdownList(&globalShutdown); \
    952 		configureCount--; \
    953 		RF_UNLOCK_MUTEX(configureMutex); \
    954 		return(rc); \
    955 	} \
    956 }
    957 
    958 #define DO_RAID_FAIL() { \
    959 	rf_ShutdownList(&raidPtr->shutdownList); \
    960 	rf_UnconfigureArray(); \
    961 }
    962 
    963 #define DO_RAID_INIT_CONFIGURE(f) { \
    964 	rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
    965 	if (rc) { \
    966 		RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
    967 		DO_RAID_FAIL(); \
    968 		return(rc); \
    969 	} \
    970 }
    971 
    972 #define DO_RAID_MUTEX(_m_) { \
    973 	rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \
    974 	if (rc) { \
    975 		RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", \
    976 			__FILE__, __LINE__, rc); \
    977 		DO_RAID_FAIL(); \
    978 		return(rc); \
    979 	} \
    980 }
    981 
    982 #define DO_RAID_COND(_c_) { \
    983 	rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \
    984 	if (rc) { \
    985 		RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", \
    986 			__FILE__, __LINE__, rc); \
    987 		DO_RAID_FAIL(); \
    988 		return(rc); \
    989 	} \
    990 }
    991 
    992 int rf_Configure(raidPtr, cfgPtr)
    993   RF_Raid_t    *raidPtr;
    994   RF_Config_t  *cfgPtr;
    995 {
    996   RF_RowCol_t row, col;
    997   int i, rc;
    998   int unit;
    999   struct proc *p;
   1000 
   1001   if (raidPtr->valid) {
   1002     RF_ERRORMSG("RAIDframe configuration not shut down.  Aborting configure.\n");
   1003     return(EINVAL);
   1004   }
   1005 
   1006   RF_LOCK_MUTEX(configureMutex);
   1007   configureCount++;
   1008   if (isconfigged == 0) {
   1009     rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex);
   1010     if (rc) {
   1011       RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
   1012         __LINE__, rc);
   1013       rf_ShutdownList(&globalShutdown);
   1014       return(rc);
   1015     }
   1016 
   1017     /* initialize globals */
   1018     printf("RAIDFRAME: protectedSectors is %ld\n",rf_protectedSectors);
   1019 
   1020     rf_clear_debug_print_buffer();
   1021 
   1022     DO_INIT_CONFIGURE(rf_ConfigureAllocList);
   1023     DO_INIT_CONFIGURE(rf_ConfigureEtimer);
   1024     /*
   1025      * Yes, this does make debugging general to the whole system instead
   1026      * of being array specific. Bummer, drag.
   1027      */
   1028     rf_ConfigureDebug(cfgPtr);
   1029     DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
   1030 #ifdef SIMULATE
   1031     rf_default_disk_names();
   1032     DO_INIT_CONFIGURE(rf_DDEventInit);
   1033 #endif /* SIMULATE */
   1034     DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
   1035     DO_INIT_CONFIGURE(rf_ConfigureMapModule);
   1036     DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
   1037     DO_INIT_CONFIGURE(rf_ConfigureCallback);
   1038     DO_INIT_CONFIGURE(rf_ConfigureMemChunk);
   1039     DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
   1040     DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
   1041     DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
   1042     DO_INIT_CONFIGURE(rf_ConfigureMCPair);
   1043 #ifndef SIMULATE
   1044 #if !defined(__NetBSD__)
   1045     DO_INIT_CONFIGURE(rf_ConfigureCamLayer);
   1046 #endif
   1047 #endif /* !SIMULATE */
   1048     DO_INIT_CONFIGURE(rf_ConfigureDAGs);
   1049     DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
   1050     DO_INIT_CONFIGURE(rf_ConfigureDebugPrint);
   1051     DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
   1052     DO_INIT_CONFIGURE(rf_ConfigureCopyback);
   1053     DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
   1054     DO_INIT_CONFIGURE(rf_ConfigureCpuMonitor);
   1055     isconfigged = 1;
   1056   }
   1057   RF_UNLOCK_MUTEX(configureMutex);
   1058 
   1059   /*
   1060    * Null out the entire raid descriptor to avoid problems when we reconfig.
   1061    * This also clears the valid bit.
   1062    */
   1063   /* XXX this clearing should be moved UP to outside of here.... that, or
   1064      rf_Configure() needs to take more arguments... XXX */
   1065 #if defined(__NetBSD__) && defined(_KERNEL)
   1066   unit = raidPtr->raidid;
   1067   p = raidPtr->proc;   /* XXX save these... */
   1068 #endif
   1069   bzero((char *)raidPtr, sizeof(RF_Raid_t));
   1070 #if defined(__NetBSD__) && defined(_KERNEL)
   1071   raidPtr->raidid = unit;
   1072   raidPtr->proc = p;   /* XXX and then recover them..*/
   1073 #endif
   1074   DO_RAID_MUTEX(&raidPtr->mutex);
   1075   /* set up the cleanup list.  Do this after ConfigureDebug so that value of memDebug will be set */
   1076 
   1077   rf_MakeAllocList(raidPtr->cleanupList);
   1078   if (raidPtr->cleanupList == NULL) {
   1079     DO_RAID_FAIL();
   1080     return(ENOMEM);
   1081   }
   1082 
   1083   rc = rf_ShutdownCreate(&raidPtr->shutdownList,
   1084 			 (void (*)(void *))rf_FreeAllocList,
   1085 			 raidPtr->cleanupList);
   1086   if (rc) {
   1087     RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
   1088       __FILE__, __LINE__, rc);
   1089     DO_RAID_FAIL();
   1090     return(rc);
   1091   }
   1092 
   1093   raidPtr->numRow = cfgPtr->numRow;
   1094   raidPtr->numCol = cfgPtr->numCol;
   1095   raidPtr->numSpare = cfgPtr->numSpare;
   1096 
   1097 #if defined(__NetBSD__) && defined(_KERNEL)
   1098   /* XXX we don't even pretend to support more than one row
   1099      in the kernel... */
   1100   if (raidPtr->numRow != 1) {
   1101 	  RF_ERRORMSG("Only one row supported in kernel.\n");
   1102 	  DO_RAID_FAIL();
   1103 	  return(EINVAL);
   1104   }
   1105 #endif
   1106 
   1107 
   1108 
   1109   RF_CallocAndAdd(raidPtr->status, raidPtr->numRow, sizeof(RF_RowStatus_t),
   1110     (RF_RowStatus_t *), raidPtr->cleanupList);
   1111   if (raidPtr->status == NULL) {
   1112     DO_RAID_FAIL();
   1113     return(ENOMEM);
   1114   }
   1115 
   1116   RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow,
   1117     sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList);
   1118   if (raidPtr->reconControl == NULL) {
   1119     DO_RAID_FAIL();
   1120     return(ENOMEM);
   1121   }
   1122   for (i=0; i<raidPtr->numRow; i++) {
   1123     raidPtr->status[i] = rf_rs_optimal;
   1124     raidPtr->reconControl[i] = NULL;
   1125   }
   1126 
   1127   DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
   1128 #if !defined(KERNEL) && !defined(SIMULATE)
   1129   DO_RAID_INIT_CONFIGURE(rf_InitThroughputStats);
   1130 #endif /* !KERNEL && !SIMULATE */
   1131 
   1132   DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
   1133 
   1134   DO_RAID_COND(&raidPtr->outstandingCond);
   1135 
   1136   raidPtr->nAccOutstanding = 0;
   1137   raidPtr->waitShutdown = 0;
   1138 
   1139   DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
   1140   DO_RAID_COND(&raidPtr->quiescent_cond);
   1141 
   1142   DO_RAID_COND(&raidPtr->waitForReconCond);
   1143 
   1144   DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex);
   1145   DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
   1146   DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
   1147   /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev no. is set */
   1148   DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
   1149 #ifndef KERNEL
   1150   DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskThreads);
   1151 #endif /* !KERNEL */
   1152 
   1153   DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
   1154 
   1155   DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
   1156 
   1157   for(row=0;row<raidPtr->numRow;row++) {
   1158     for(col=0;col<raidPtr->numCol;col++) {
   1159       /*
   1160        * XXX better distribution
   1161        */
   1162       raidPtr->hist_diskreq[row][col] = 0;
   1163     }
   1164   }
   1165 
   1166   if (rf_keepAccTotals) {
   1167     raidPtr->keep_acc_totals = 1;
   1168   }
   1169 
   1170   rf_StartUserStats(raidPtr);
   1171 
   1172   raidPtr->valid = 1;
   1173   return(0);
   1174 }
   1175 
   1176 static int init_rad(desc)
   1177   RF_RaidAccessDesc_t  *desc;
   1178 {
   1179 	int rc;
   1180 
   1181 	rc = rf_mutex_init(&desc->mutex);
   1182 	if (rc) {
   1183 		RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
   1184 			__LINE__, rc);
   1185 		return(rc);
   1186 	}
   1187 	rc = rf_cond_init(&desc->cond);
   1188 	if (rc) {
   1189 		RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d\n", __FILE__,
   1190 			__LINE__, rc);
   1191 		rf_mutex_destroy(&desc->mutex);
   1192 		return(rc);
   1193 	}
   1194 	return(0);
   1195 }
   1196 
   1197 static void clean_rad(desc)
   1198   RF_RaidAccessDesc_t  *desc;
   1199 {
   1200 	rf_mutex_destroy(&desc->mutex);
   1201 	rf_cond_destroy(&desc->cond);
   1202 }
   1203 
   1204 static void rf_ShutdownRDFreeList(ignored)
   1205   void  *ignored;
   1206 {
   1207 	RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist,next,(RF_RaidAccessDesc_t *),clean_rad);
   1208 }
   1209 
   1210 static int rf_ConfigureRDFreeList(listp)
   1211   RF_ShutdownList_t **listp;
   1212 {
   1213 	int rc;
   1214 
   1215 	RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD,
   1216 		RF_RAD_INC, sizeof(RF_RaidAccessDesc_t));
   1217 	if (rf_rad_freelist == NULL) {
   1218 		return(ENOMEM);
   1219 	}
   1220 	rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
   1221 	if (rc) {
   1222 		RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", __FILE__,
   1223 			__LINE__, rc);
   1224 		rf_ShutdownRDFreeList(NULL);
   1225 		return(rc);
   1226 	}
   1227 	RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL,next,
   1228 		(RF_RaidAccessDesc_t *),init_rad);
   1229 	return(0);
   1230 }
   1231 
   1232 RF_RaidAccessDesc_t *rf_AllocRaidAccDesc(
   1233   RF_Raid_t                    *raidPtr,
   1234   RF_IoType_t                   type,
   1235   RF_RaidAddr_t                 raidAddress,
   1236   RF_SectorCount_t              numBlocks,
   1237   caddr_t                       bufPtr,
   1238   void                         *bp,
   1239   RF_DagHeader_t              **paramDAG,
   1240   RF_AccessStripeMapHeader_t  **paramASM,
   1241   RF_RaidAccessFlags_t          flags,
   1242   void                        (*cbF)(struct buf *),
   1243   void                         *cbA,
   1244   RF_AccessState_t             *states)
   1245 {
   1246   RF_RaidAccessDesc_t *desc;
   1247 
   1248   RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist,desc,next,(RF_RaidAccessDesc_t *),init_rad);
   1249   if (raidPtr->waitShutdown) {
   1250     /*
   1251      * Actually, we're shutting the array down. Free the desc
   1252      * and return NULL.
   1253      */
   1254     RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
   1255     RF_FREELIST_FREE_CLEAN(rf_rad_freelist,desc,next,clean_rad);
   1256     return(NULL);
   1257   }
   1258   raidPtr->nAccOutstanding++;
   1259   RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
   1260 
   1261   desc->raidPtr     = (void*)raidPtr;
   1262   desc->type        = type;
   1263   desc->raidAddress = raidAddress;
   1264   desc->numBlocks   = numBlocks;
   1265   desc->bufPtr      = bufPtr;
   1266   desc->bp          = bp;
   1267   desc->paramDAG    = paramDAG;
   1268   desc->paramASM    = paramASM;
   1269   desc->flags       = flags;
   1270   desc -> states    = states;
   1271   desc -> state     = 0;
   1272 
   1273   desc->status      = 0;
   1274   bzero((char *)&desc->tracerec, sizeof(RF_AccTraceEntry_t));
   1275   desc->callbackFunc= (void (*)(RF_CBParam_t))cbF; /* XXX */
   1276   desc->callbackArg = cbA;
   1277   desc->next        = NULL;
   1278   desc->head	    = desc;
   1279   desc->numPending  = 0;
   1280   desc->cleanupList = NULL;
   1281   rf_MakeAllocList(desc->cleanupList);
   1282   rf_get_threadid(desc->tid);
   1283 #ifdef SIMULATE
   1284   desc->owner = rf_GetCurrentOwner();
   1285 #endif /* SIMULATE */
   1286   return(desc);
   1287 }
   1288 
   1289 void rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
   1290 {
   1291   RF_Raid_t *raidPtr = desc->raidPtr;
   1292 
   1293   RF_ASSERT(desc);
   1294 
   1295 #if !defined(KERNEL) && !defined(SIMULATE)
   1296   rf_StopThroughputStats(raidPtr);
   1297 #endif /* !KERNEL && !SIMULATE */
   1298 
   1299   rf_FreeAllocList(desc->cleanupList);
   1300   RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist,desc,next,clean_rad);
   1301     raidPtr->nAccOutstanding--;
   1302     if (raidPtr->waitShutdown) {
   1303       RF_SIGNAL_COND(raidPtr->outstandingCond);
   1304     }
   1305   RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
   1306 }
   1307 
   1308 #ifdef JIMZ
   1309 #define THREAD_NUMDESC 1024
   1310 #define THREAD_NUM 600
   1311 static RF_RaidAccessDesc_t *dest_hist[THREAD_NUM*THREAD_NUMDESC];
   1312 int jimz_access_num[THREAD_NUM];
   1313 #endif /* JIMZ */
   1314 
   1315 /*********************************************************************
   1316  * Main routine for performing an access.
   1317  * Accesses are retried until a DAG can not be selected.  This occurs
   1318  * when either the DAG library is incomplete or there are too many
   1319  * failures in a parity group.
   1320  ********************************************************************/
   1321 int rf_DoAccess(
   1322   RF_Raid_t                    *raidPtr,
   1323   RF_IoType_t                   type,
   1324   int                           async_flag,
   1325   RF_RaidAddr_t                 raidAddress,
   1326   RF_SectorCount_t              numBlocks,
   1327   caddr_t                       bufPtr,
   1328   void                         *bp_in,
   1329   RF_DagHeader_t              **paramDAG,
   1330   RF_AccessStripeMapHeader_t  **paramASM,
   1331   RF_RaidAccessFlags_t          flags,
   1332   RF_RaidAccessDesc_t         **paramDesc,
   1333   void                        (*cbF)(struct buf *),
   1334   void                         *cbA)
   1335 /*
   1336 type should be read or write
   1337 async_flag should be RF_TRUE or RF_FALSE
   1338 bp_in is a buf pointer.  void * to facilitate ignoring it outside the kernel
   1339 */
   1340 {
   1341   int tid;
   1342   RF_RaidAccessDesc_t *desc;
   1343   caddr_t lbufPtr = bufPtr;
   1344 #ifdef KERNEL
   1345   struct buf *bp = (struct buf *) bp_in;
   1346 #if DFSTRACE > 0
   1347   struct { RF_uint64 raidAddr; int numBlocks; char type;} dfsrecord;
   1348 #endif /* DFSTRACE > 0 */
   1349 #else /* KERNEL */
   1350   void *bp = bp_in;
   1351 #endif /* KERNEL */
   1352 
   1353   raidAddress += rf_raidSectorOffset;
   1354 
   1355   if (!raidPtr->valid) {
   1356     RF_ERRORMSG("RAIDframe driver not successfully configured.  Rejecting access.\n");
   1357     IO_BUF_ERR(bp, EINVAL, raidPtr->raidid);
   1358     return(EINVAL);
   1359   }
   1360 
   1361 #if defined(KERNEL) && DFSTRACE > 0
   1362   if (rf_DFSTraceAccesses) {
   1363     dfsrecord.raidAddr  = raidAddress;
   1364     dfsrecord.numBlocks = numBlocks;
   1365     dfsrecord.type      = type;
   1366     dfs_log(DFS_NOTE, (char *) &dfsrecord, sizeof(dfsrecord), 0);
   1367   }
   1368 #endif /* KERNEL && DFSTRACE > 0 */
   1369 
   1370   rf_get_threadid(tid);
   1371   if (rf_accessDebug) {
   1372 
   1373 	  printf("logBytes is: %d %d %d\n",raidPtr->raidid,
   1374 		 raidPtr->logBytesPerSector,
   1375 		 (int)rf_RaidAddressToByte(raidPtr,numBlocks));
   1376     printf("[%d] %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n",tid,
   1377 	   (type==RF_IO_TYPE_READ) ? "READ":"WRITE", (int)raidAddress,
   1378 	   (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
   1379 	   (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress+numBlocks-1),
   1380 	   (int) numBlocks,
   1381 	   (int) rf_RaidAddressToByte(raidPtr,numBlocks),
   1382 	   (long) bufPtr);
   1383   }
   1384 
   1385   if (raidAddress + numBlocks > raidPtr->totalSectors) {
   1386 
   1387     printf("DoAccess: raid addr %lu too large to access %lu sectors.  Max legal addr is %lu\n",
   1388            (u_long)raidAddress,(u_long)numBlocks,(u_long)raidPtr->totalSectors);
   1389 
   1390 #ifdef KERNEL
   1391     if (type == RF_IO_TYPE_READ) {
   1392       IO_BUF_ERR(bp, ENOSPC, raidPtr->raidid);
   1393       return(ENOSPC);
   1394     } else {
   1395       IO_BUF_ERR(bp, ENOSPC, raidPtr->raidid);
   1396       return(ENOSPC);
   1397     }
   1398 #elif defined(SIMULATE)
   1399     RF_PANIC();
   1400 #else /* SIMULATE */
   1401     return(EIO);
   1402 #endif /* SIMULATE */
   1403   }
   1404 
   1405 #if !defined(KERNEL) && !defined(SIMULATE)
   1406   rf_StartThroughputStats(raidPtr);
   1407 #endif /* !KERNEL && !SIMULATE */
   1408 
   1409   desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
   1410 			  numBlocks, lbufPtr, bp, paramDAG, paramASM,
   1411 			  flags, cbF, cbA, raidPtr->Layout.map->states);
   1412 
   1413   if (desc == NULL) {
   1414     return(ENOMEM);
   1415   }
   1416 #ifdef JIMZ
   1417   dest_hist[(tid*THREAD_NUMDESC)+jimz_access_num[tid]]; jimz_access_num[tid]++;
   1418 #endif /* JIMZ */
   1419 
   1420   RF_ETIMER_START(desc->tracerec.tot_timer);
   1421 
   1422 #ifdef SIMULATE
   1423   /* simulator uses paramDesc to continue dag from test function */
   1424   desc->async_flag=async_flag;
   1425 
   1426   *paramDesc=desc;
   1427 
   1428   return(0);
   1429 #endif /* SIMULATE */
   1430 
   1431   rf_ContinueRaidAccess(desc);
   1432 
   1433 #ifndef KERNEL
   1434   if (!(flags & RF_DAG_NONBLOCKING_IO)) {
   1435     RF_LOCK_MUTEX(desc->mutex);
   1436     while (!(desc->flags & RF_DAG_ACCESS_COMPLETE)) {
   1437       RF_WAIT_COND(desc->cond, desc->mutex);
   1438     }
   1439     RF_UNLOCK_MUTEX(desc->mutex);
   1440     rf_FreeRaidAccDesc(desc);
   1441   }
   1442 #endif /* !KERNEL */
   1443 
   1444   return(0);
   1445 }
   1446 
   1447 /* force the array into reconfigured mode without doing reconstruction */
   1448 int rf_SetReconfiguredMode(raidPtr, row, col)
   1449   RF_Raid_t  *raidPtr;
   1450   int         row;
   1451   int         col;
   1452 {
   1453   if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
   1454     printf("Can't set reconfigured mode in dedicated-spare array\n");
   1455     RF_PANIC();
   1456   }
   1457   RF_LOCK_MUTEX(raidPtr->mutex);
   1458   raidPtr->numFailures++;
   1459   raidPtr->Disks[row][col].status = rf_ds_dist_spared;
   1460   raidPtr->status[row] = rf_rs_reconfigured;
   1461   /* install spare table only if declustering + distributed sparing architecture. */
   1462   if ( raidPtr->Layout.map->flags & RF_BD_DECLUSTERED )
   1463      rf_InstallSpareTable(raidPtr, row, col);
   1464   RF_UNLOCK_MUTEX(raidPtr->mutex);
   1465   return(0);
   1466 }
   1467 
   1468 extern int fail_row, fail_col, fail_time;
   1469 extern int delayed_recon;
   1470 
   1471 int rf_FailDisk(
   1472   RF_Raid_t  *raidPtr,
   1473   int         frow,
   1474   int         fcol,
   1475   int         initRecon)
   1476 {
   1477   int tid;
   1478 
   1479   rf_get_threadid(tid);
   1480   printf("[%d] Failing disk r%d c%d\n",tid,frow,fcol);
   1481   RF_LOCK_MUTEX(raidPtr->mutex);
   1482   raidPtr->numFailures++;
   1483   raidPtr->Disks[frow][fcol].status = rf_ds_failed;
   1484   raidPtr->status[frow] = rf_rs_degraded;
   1485   RF_UNLOCK_MUTEX(raidPtr->mutex);
   1486 #ifdef SIMULATE
   1487 #if RF_DEMO > 0
   1488   if (rf_demoMode) {
   1489     rf_demo_update_mode (RF_DEMO_DEGRADED);
   1490     fail_col = fcol; fail_row = frow;
   1491     fail_time = rf_CurTime(); /* XXX */
   1492     if (initRecon)
   1493       delayed_recon = RF_TRUE;
   1494   }
   1495   else {
   1496     if (initRecon)
   1497       rf_ReconstructFailedDisk(raidPtr, frow, fcol);
   1498   }
   1499 #else /* RF_DEMO > 0 */
   1500   if (initRecon)
   1501     rf_ReconstructFailedDisk(raidPtr, frow, fcol);
   1502 #endif /* RF_DEMO > 0 */
   1503 #else /* SIMULATE */
   1504   if (initRecon)
   1505     rf_ReconstructFailedDisk(raidPtr, frow, fcol);
   1506 #endif /* SIMULATE */
   1507   return(0);
   1508 }
   1509 
   1510 #ifdef SIMULATE
   1511 extern RF_Owner_t recon_owner;
   1512 
   1513 void rf_ScheduleContinueReconstructFailedDisk(reconDesc)
   1514   RF_RaidReconDesc_t  *reconDesc;
   1515 {
   1516   rf_DDEventRequest(rf_CurTime(), rf_ContinueReconstructFailedDisk,
   1517     (void *) reconDesc, recon_owner, -4, -4, reconDesc->raidPtr, NULL);
   1518 }
   1519 #endif /* SIMULATE */
   1520 
   1521 /* releases a thread that is waiting for the array to become quiesced.
   1522  * access_suspend_mutex should be locked upon calling this
   1523  */
   1524 void rf_SignalQuiescenceLock(raidPtr, reconDesc)
   1525   RF_Raid_t           *raidPtr;
   1526   RF_RaidReconDesc_t  *reconDesc;
   1527 {
   1528   int tid;
   1529 
   1530   if (rf_quiesceDebug) {
   1531     rf_get_threadid(tid);
   1532     printf("[%d] Signalling quiescence lock\n", tid);
   1533   }
   1534   raidPtr->access_suspend_release = 1;
   1535 
   1536   if (raidPtr->waiting_for_quiescence) {
   1537 #ifndef SIMULATE
   1538     SIGNAL_QUIESCENT_COND(raidPtr);
   1539 #else /* !SIMULATE */
   1540     if (reconDesc) {
   1541       rf_ScheduleContinueReconstructFailedDisk(reconDesc);
   1542     }
   1543 #endif /* !SIMULATE */
   1544   }
   1545 }
   1546 
   1547 /* suspends all new requests to the array.  No effect on accesses that are in flight.  */
   1548 int rf_SuspendNewRequestsAndWait(raidPtr)
   1549   RF_Raid_t  *raidPtr;
   1550 {
   1551   if (rf_quiesceDebug)
   1552     printf("Suspending new reqs\n");
   1553 
   1554   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
   1555   raidPtr->accesses_suspended++;
   1556   raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
   1557 
   1558 #ifndef SIMULATE
   1559   if (raidPtr->waiting_for_quiescence) {
   1560     raidPtr->access_suspend_release=0;
   1561     while (!raidPtr->access_suspend_release) {
   1562 	    printf("Suspending: Waiting for Quiesence\n");
   1563       WAIT_FOR_QUIESCENCE(raidPtr);
   1564       raidPtr->waiting_for_quiescence = 0;
   1565     }
   1566   }
   1567   printf("Quiesence reached..\n");
   1568 #endif /* !SIMULATE */
   1569 
   1570   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
   1571   return (raidPtr->waiting_for_quiescence);
   1572 }
   1573 
   1574 /* wake up everyone waiting for quiescence to be released */
   1575 void rf_ResumeNewRequests(raidPtr)
   1576   RF_Raid_t  *raidPtr;
   1577 {
   1578   RF_CallbackDesc_t *t, *cb;
   1579 
   1580   if (rf_quiesceDebug)
   1581     printf("Resuming new reqs\n");
   1582 
   1583   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
   1584   raidPtr->accesses_suspended--;
   1585   if (raidPtr->accesses_suspended == 0)
   1586     cb = raidPtr->quiesce_wait_list;
   1587   else
   1588     cb = NULL;
   1589   raidPtr->quiesce_wait_list = NULL;
   1590   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
   1591 
   1592   while (cb) {
   1593     t = cb;
   1594     cb = cb->next;
   1595     (t->callbackFunc)(t->callbackArg);
   1596     rf_FreeCallbackDesc(t);
   1597   }
   1598 }
   1599 
   1600 /*****************************************************************************************
   1601  *
   1602  * debug routines
   1603  *
   1604  ****************************************************************************************/
   1605 
   1606 static void set_debug_option(name, val)
   1607   char  *name;
   1608   long   val;
   1609 {
   1610   RF_DebugName_t *p;
   1611 
   1612   for (p = rf_debugNames; p->name; p++) {
   1613     if (!strcmp(p->name, name)) {
   1614       *(p->ptr) = val;
   1615       printf("[Set debug variable %s to %ld]\n",name,val);
   1616       return;
   1617     }
   1618   }
   1619   RF_ERRORMSG1("Unknown debug string \"%s\"\n",name);
   1620 }
   1621 
   1622 
   1623 /* would like to use sscanf here, but apparently not available in kernel */
   1624 /*ARGSUSED*/
   1625 static void rf_ConfigureDebug(cfgPtr)
   1626   RF_Config_t  *cfgPtr;
   1627 {
   1628   char *val_p, *name_p, *white_p;
   1629   long val;
   1630   int i;
   1631 
   1632   rf_ResetDebugOptions();
   1633   for (i=0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
   1634     name_p  = rf_find_non_white(&cfgPtr->debugVars[i][0]);
   1635     white_p = rf_find_white(name_p);                                   /* skip to start of 2nd word */
   1636     val_p   = rf_find_non_white(white_p);
   1637     if (*val_p == '0' && *(val_p+1) == 'x') val = rf_htoi(val_p+2);
   1638     else val = rf_atoi(val_p);
   1639     *white_p = '\0';
   1640     set_debug_option(name_p, val);
   1641   }
   1642 }
   1643 
   1644 /* performance monitoring stuff */
   1645 
   1646 #define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec)
   1647 
   1648 #if !defined(KERNEL) && !defined(SIMULATE)
   1649 
   1650 /*
   1651  * Throughput stats currently only used in user-level RAIDframe
   1652  */
   1653 
   1654 static int rf_InitThroughputStats(
   1655   RF_ShutdownList_t  **listp,
   1656   RF_Raid_t           *raidPtr,
   1657   RF_Config_t         *cfgPtr)
   1658 {
   1659   int rc;
   1660 
   1661   /* these used by user-level raidframe only */
   1662   rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex);
   1663   if (rc) {
   1664     RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
   1665       __LINE__, rc);
   1666     return(rc);
   1667   }
   1668   raidPtr->throughputstats.sum_io_us = 0;
   1669   raidPtr->throughputstats.num_ios = 0;
   1670   raidPtr->throughputstats.num_out_ios = 0;
   1671   return(0);
   1672 }
   1673 
   1674 void rf_StartThroughputStats(RF_Raid_t *raidPtr)
   1675 {
   1676   RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
   1677   raidPtr->throughputstats.num_ios++;
   1678   raidPtr->throughputstats.num_out_ios++;
   1679   if (raidPtr->throughputstats.num_out_ios == 1)
   1680     RF_GETTIME(raidPtr->throughputstats.start);
   1681   RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
   1682 }
   1683 
   1684 static void rf_StopThroughputStats(RF_Raid_t *raidPtr)
   1685 {
   1686   struct timeval diff;
   1687 
   1688   RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
   1689   raidPtr->throughputstats.num_out_ios--;
   1690   if (raidPtr->throughputstats.num_out_ios == 0) {
   1691     RF_GETTIME(raidPtr->throughputstats.stop);
   1692     RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff);
   1693     raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff);
   1694   }
   1695   RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
   1696 }
   1697 
   1698 static void rf_PrintThroughputStats(RF_Raid_t *raidPtr)
   1699 {
   1700   RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0);
   1701   if ( raidPtr->throughputstats.sum_io_us != 0 ) {
   1702      printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios
   1703        / (raidPtr->throughputstats.sum_io_us / 1000000.0));
   1704   }
   1705 }
   1706 
   1707 #endif /* !KERNEL && !SIMULATE */
   1708 
   1709 void rf_StartUserStats(RF_Raid_t *raidPtr)
   1710 {
   1711   RF_GETTIME(raidPtr->userstats.start);
   1712   raidPtr->userstats.sum_io_us = 0;
   1713   raidPtr->userstats.num_ios = 0;
   1714   raidPtr->userstats.num_sect_moved = 0;
   1715 }
   1716 
   1717 void rf_StopUserStats(RF_Raid_t *raidPtr)
   1718 {
   1719   RF_GETTIME(raidPtr->userstats.stop);
   1720 }
   1721 
   1722 void rf_UpdateUserStats(raidPtr, rt, numsect)
   1723   RF_Raid_t  *raidPtr;
   1724   int         rt;       /* resp time in us */
   1725   int         numsect;  /* number of sectors for this access */
   1726 {
   1727   raidPtr->userstats.sum_io_us += rt;
   1728   raidPtr->userstats.num_ios++;
   1729   raidPtr->userstats.num_sect_moved += numsect;
   1730 }
   1731 
   1732 void rf_PrintUserStats(RF_Raid_t *raidPtr)
   1733 {
   1734   long elapsed_us, mbs, mbs_frac;
   1735   struct timeval diff;
   1736 
   1737   RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop, &diff);
   1738   elapsed_us = TIMEVAL_TO_US(diff);
   1739 
   1740   /* 2000 sectors per megabyte, 10000000 microseconds per second */
   1741   if (elapsed_us)
   1742     mbs = (raidPtr->userstats.num_sect_moved / 2000) / (elapsed_us / 1000000);
   1743   else
   1744     mbs = 0;
   1745 
   1746   /* this computes only the first digit of the fractional mb/s moved */
   1747   if (elapsed_us) {
   1748     mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / (elapsed_us / 1000000))
   1749       - (mbs * 10);
   1750   }
   1751   else {
   1752     mbs_frac = 0;
   1753   }
   1754 
   1755   printf("Number of I/Os:             %ld\n",raidPtr->userstats.num_ios);
   1756   printf("Elapsed time (us):          %ld\n",elapsed_us);
   1757   printf("User I/Os per second:       %ld\n",RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us/1000000)));
   1758   printf("Average user response time: %ld us\n",RF_DB0_CHECK(raidPtr->userstats.sum_io_us, raidPtr->userstats.num_ios));
   1759   printf("Total sectors moved:        %ld\n",raidPtr->userstats.num_sect_moved);
   1760   printf("Average access size (sect): %ld\n",RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, raidPtr->userstats.num_ios));
   1761   printf("Achieved data rate:         %ld.%ld MB/sec\n",mbs,mbs_frac);
   1762 }
   1763