Home | History | Annotate | Line # | Download | only in server
nfs_nfsdcache.c revision 1.1
      1  1.1  dholland /*	$NetBSD: nfs_nfsdcache.c,v 1.1 2013/09/30 07:19:57 dholland Exp $	*/
      2  1.1  dholland /*-
      3  1.1  dholland  * Copyright (c) 1989, 1993
      4  1.1  dholland  *	The Regents of the University of California.  All rights reserved.
      5  1.1  dholland  *
      6  1.1  dholland  * This code is derived from software contributed to Berkeley by
      7  1.1  dholland  * Rick Macklem at The University of Guelph.
      8  1.1  dholland  *
      9  1.1  dholland  * Redistribution and use in source and binary forms, with or without
     10  1.1  dholland  * modification, are permitted provided that the following conditions
     11  1.1  dholland  * are met:
     12  1.1  dholland  * 1. Redistributions of source code must retain the above copyright
     13  1.1  dholland  *    notice, this list of conditions and the following disclaimer.
     14  1.1  dholland  * 2. Redistributions in binary form must reproduce the above copyright
     15  1.1  dholland  *    notice, this list of conditions and the following disclaimer in the
     16  1.1  dholland  *    documentation and/or other materials provided with the distribution.
     17  1.1  dholland  * 4. Neither the name of the University nor the names of its contributors
     18  1.1  dholland  *    may be used to endorse or promote products derived from this software
     19  1.1  dholland  *    without specific prior written permission.
     20  1.1  dholland  *
     21  1.1  dholland  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  1.1  dholland  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  1.1  dholland  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  1.1  dholland  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  1.1  dholland  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  1.1  dholland  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  1.1  dholland  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  1.1  dholland  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  1.1  dholland  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  1.1  dholland  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  1.1  dholland  * SUCH DAMAGE.
     32  1.1  dholland  *
     33  1.1  dholland  */
     34  1.1  dholland 
     35  1.1  dholland #include <sys/cdefs.h>
     36  1.1  dholland /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 254337 2013-08-14 21:11:26Z rmacklem "); */
     37  1.1  dholland __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.1 2013/09/30 07:19:57 dholland Exp $");
     38  1.1  dholland 
     39  1.1  dholland /*
     40  1.1  dholland  * Here is the basic algorithm:
     41  1.1  dholland  * First, some design criteria I used:
     42  1.1  dholland  * - I think a false hit is more serious than a false miss
     43  1.1  dholland  * - A false hit for an RPC that has Op(s) that order via seqid# must be
     44  1.1  dholland  *   avoided at all cost
     45  1.1  dholland  * - A valid hit will probably happen a long time after the original reply
     46  1.1  dholland  *   and the TCP socket that the original request was received on will no
     47  1.1  dholland  *   longer be active
     48  1.1  dholland  *   (The long time delay implies to me that LRU is not appropriate.)
     49  1.1  dholland  * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
     50  1.1  dholland  *   in them as well as minimizing the risk of redoing retried non-idempotent
     51  1.1  dholland  *   Ops.
     52  1.1  dholland  * Because it is biased towards avoiding false hits, multiple entries with
     53  1.1  dholland  * the same xid are to be expected, especially for the case of the entry
     54  1.1  dholland  * in the cache being related to a seqid# sequenced Op.
     55  1.1  dholland  *
     56  1.1  dholland  * The basic algorithm I'm about to code up:
     57  1.1  dholland  * - Null RPCs bypass the cache and are just done
     58  1.1  dholland  * For TCP
     59  1.1  dholland  * 	- key on <xid, NFS version> (as noted above, there can be several
     60  1.1  dholland  * 				     entries with the same key)
     61  1.1  dholland  * 	When a request arrives:
     62  1.1  dholland  * 		For all that match key
     63  1.1  dholland  * 		- if RPC# != OR request_size !=
     64  1.1  dholland  * 			- not a match with this one
     65  1.1  dholland  * 		- if NFSv4 and received on same TCP socket OR
     66  1.1  dholland  *			received on a TCP connection created before the
     67  1.1  dholland  *			entry was cached
     68  1.1  dholland  * 			- not a match with this one
     69  1.1  dholland  * 			(V2,3 clients might retry on same TCP socket)
     70  1.1  dholland  * 		- calculate checksum on first N bytes of NFS XDR
     71  1.1  dholland  * 		- if checksum !=
     72  1.1  dholland  * 			- not a match for this one
     73  1.1  dholland  * 		If any of the remaining ones that match has a
     74  1.1  dholland  * 			seqid_refcnt > 0
     75  1.1  dholland  * 			- not a match (go do RPC, using new cache entry)
     76  1.1  dholland  * 		If one match left
     77  1.1  dholland  * 			- a hit (reply from cache)
     78  1.1  dholland  * 		else
     79  1.1  dholland  * 			- miss (go do RPC, using new cache entry)
     80  1.1  dholland  *
     81  1.1  dholland  * 	During processing of NFSv4 request:
     82  1.1  dholland  * 		- set a flag when a non-idempotent Op is processed
     83  1.1  dholland  * 		- when an Op that uses a seqid# (Open,...) is processed
     84  1.1  dholland  * 			- if same seqid# as referenced entry in cache
     85  1.1  dholland  * 				- free new cache entry
     86  1.1  dholland  * 				- reply from referenced cache entry
     87  1.1  dholland  * 			  else if next seqid# in order
     88  1.1  dholland  * 				- free referenced cache entry
     89  1.1  dholland  * 				- increment seqid_refcnt on new cache entry
     90  1.1  dholland  * 				- set pointer from Openowner/Lockowner to
     91  1.1  dholland  * 					new cache entry (aka reference it)
     92  1.1  dholland  * 			  else if first seqid# in sequence
     93  1.1  dholland  * 				- increment seqid_refcnt on new cache entry
     94  1.1  dholland  * 				- set pointer from Openowner/Lockowner to
     95  1.1  dholland  * 					new cache entry (aka reference it)
     96  1.1  dholland  *
     97  1.1  dholland  * 	At end of RPC processing:
     98  1.1  dholland  * 		- if seqid_refcnt > 0 OR flagged non-idempotent on new
     99  1.1  dholland  * 			cache entry
    100  1.1  dholland  * 			- save reply in cache entry
    101  1.1  dholland  * 			- calculate checksum on first N bytes of NFS XDR
    102  1.1  dholland  * 				request
    103  1.1  dholland  * 			- note op and length of XDR request (in bytes)
    104  1.1  dholland  * 			- timestamp it
    105  1.1  dholland  * 		  else
    106  1.1  dholland  * 			- free new cache entry
    107  1.1  dholland  * 		- Send reply (noting info for socket activity check, below)
    108  1.1  dholland  *
    109  1.1  dholland  * 	For cache entries saved above:
    110  1.1  dholland  * 		- if saved since seqid_refcnt was > 0
    111  1.1  dholland  * 			- free when seqid_refcnt decrements to 0
    112  1.1  dholland  * 			  (when next one in sequence is processed above, or
    113  1.1  dholland  * 			   when Openowner/Lockowner is discarded)
    114  1.1  dholland  * 		  else { non-idempotent Op(s) }
    115  1.1  dholland  * 			- free when
    116  1.1  dholland  * 				- some further activity observed on same
    117  1.1  dholland  * 					socket
    118  1.1  dholland  * 				  (I'm not yet sure how I'm going to do
    119  1.1  dholland  * 				   this. Maybe look at the TCP connection
    120  1.1  dholland  * 				   to see if the send_tcp_sequence# is well
    121  1.1  dholland  * 				   past sent reply OR K additional RPCs
    122  1.1  dholland  * 				   replied on same socket OR?)
    123  1.1  dholland  * 			  OR
    124  1.1  dholland  * 				- when very old (hours, days, weeks?)
    125  1.1  dholland  *
    126  1.1  dholland  * For UDP (v2, 3 only), pretty much the old way:
    127  1.1  dholland  * - key on <xid, NFS version, RPC#, Client host ip#>
    128  1.1  dholland  *   (at most one entry for each key)
    129  1.1  dholland  *
    130  1.1  dholland  * When a Request arrives:
    131  1.1  dholland  * - if a match with entry via key
    132  1.1  dholland  * 	- if RPC marked In_progress
    133  1.1  dholland  * 		- discard request (don't send reply)
    134  1.1  dholland  * 	  else
    135  1.1  dholland  * 		- reply from cache
    136  1.1  dholland  * 		- timestamp cache entry
    137  1.1  dholland  *   else
    138  1.1  dholland  * 	- add entry to cache, marked In_progress
    139  1.1  dholland  * 	- do RPC
    140  1.1  dholland  * 	- when RPC done
    141  1.1  dholland  * 		- if RPC# non-idempotent
    142  1.1  dholland  * 			- mark entry Done (not In_progress)
    143  1.1  dholland  * 			- save reply
    144  1.1  dholland  * 			- timestamp cache entry
    145  1.1  dholland  * 		  else
    146  1.1  dholland  * 			- free cache entry
    147  1.1  dholland  * 		- send reply
    148  1.1  dholland  *
    149  1.1  dholland  * Later, entries with saved replies are free'd a short time (few minutes)
    150  1.1  dholland  * after reply sent (timestamp).
    151  1.1  dholland  * Reference: Chet Juszczak, "Improving the Performance and Correctness
    152  1.1  dholland  *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
    153  1.1  dholland  *		pages 53-63. San Diego, February 1989.
    154  1.1  dholland  *	 for the UDP case.
    155  1.1  dholland  * nfsrc_floodlevel is set to the allowable upper limit for saved replies
    156  1.1  dholland  *	for TCP. For V3, a reply won't be saved when the flood level is
    157  1.1  dholland  *	hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
    158  1.1  dholland  *	that case. This level should be set high enough that this almost
    159  1.1  dholland  *	never happens.
    160  1.1  dholland  */
    161  1.1  dholland #ifndef APPLEKEXT
    162  1.1  dholland #include <fs/nfs/nfsport.h>
    163  1.1  dholland 
    164  1.1  dholland extern struct nfsstats newnfsstats;
    165  1.1  dholland extern struct mtx nfsrc_udpmtx;
    166  1.1  dholland extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
    167  1.1  dholland int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
    168  1.1  dholland #endif	/* !APPLEKEXT */
    169  1.1  dholland 
    170  1.1  dholland SYSCTL_DECL(_vfs_nfsd);
    171  1.1  dholland 
    172  1.1  dholland static u_int	nfsrc_tcphighwater = 0;
    173  1.1  dholland static int
    174  1.1  dholland sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
    175  1.1  dholland {
    176  1.1  dholland 	int error, newhighwater;
    177  1.1  dholland 
    178  1.1  dholland 	newhighwater = nfsrc_tcphighwater;
    179  1.1  dholland 	error = sysctl_handle_int(oidp, &newhighwater, 0, req);
    180  1.1  dholland 	if (error != 0 || req->newptr == NULL)
    181  1.1  dholland 		return (error);
    182  1.1  dholland 	if (newhighwater < 0)
    183  1.1  dholland 		return (EINVAL);
    184  1.1  dholland 	if (newhighwater >= nfsrc_floodlevel)
    185  1.1  dholland 		nfsrc_floodlevel = newhighwater + newhighwater / 5;
    186  1.1  dholland 	nfsrc_tcphighwater = newhighwater;
    187  1.1  dholland 	return (0);
    188  1.1  dholland }
    189  1.1  dholland SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
    190  1.1  dholland     sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
    191  1.1  dholland     "High water mark for TCP cache entries");
    192  1.1  dholland 
    193  1.1  dholland static u_int	nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
    194  1.1  dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
    195  1.1  dholland     &nfsrc_udphighwater, 0,
    196  1.1  dholland     "High water mark for UDP cache entries");
    197  1.1  dholland static u_int	nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
    198  1.1  dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
    199  1.1  dholland     &nfsrc_tcptimeout, 0,
    200  1.1  dholland     "Timeout for TCP entries in the DRC");
    201  1.1  dholland static u_int nfsrc_tcpnonidempotent = 1;
    202  1.1  dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
    203  1.1  dholland     &nfsrc_tcpnonidempotent, 0,
    204  1.1  dholland     "Enable the DRC for NFS over TCP");
    205  1.1  dholland 
    206  1.1  dholland static int nfsrc_udpcachesize = 0;
    207  1.1  dholland static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
    208  1.1  dholland static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
    209  1.1  dholland 
    210  1.1  dholland /*
    211  1.1  dholland  * and the reverse mapping from generic to Version 2 procedure numbers
    212  1.1  dholland  */
    213  1.1  dholland static int newnfsv2_procid[NFS_V3NPROCS] = {
    214  1.1  dholland 	NFSV2PROC_NULL,
    215  1.1  dholland 	NFSV2PROC_GETATTR,
    216  1.1  dholland 	NFSV2PROC_SETATTR,
    217  1.1  dholland 	NFSV2PROC_LOOKUP,
    218  1.1  dholland 	NFSV2PROC_NOOP,
    219  1.1  dholland 	NFSV2PROC_READLINK,
    220  1.1  dholland 	NFSV2PROC_READ,
    221  1.1  dholland 	NFSV2PROC_WRITE,
    222  1.1  dholland 	NFSV2PROC_CREATE,
    223  1.1  dholland 	NFSV2PROC_MKDIR,
    224  1.1  dholland 	NFSV2PROC_SYMLINK,
    225  1.1  dholland 	NFSV2PROC_CREATE,
    226  1.1  dholland 	NFSV2PROC_REMOVE,
    227  1.1  dholland 	NFSV2PROC_RMDIR,
    228  1.1  dholland 	NFSV2PROC_RENAME,
    229  1.1  dholland 	NFSV2PROC_LINK,
    230  1.1  dholland 	NFSV2PROC_READDIR,
    231  1.1  dholland 	NFSV2PROC_NOOP,
    232  1.1  dholland 	NFSV2PROC_STATFS,
    233  1.1  dholland 	NFSV2PROC_NOOP,
    234  1.1  dholland 	NFSV2PROC_NOOP,
    235  1.1  dholland 	NFSV2PROC_NOOP,
    236  1.1  dholland };
    237  1.1  dholland 
    238  1.1  dholland #define	nfsrc_hash(xid)	(((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
    239  1.1  dholland #define	NFSRCUDPHASH(xid) \
    240  1.1  dholland 	(&nfsrvudphashtbl[nfsrc_hash(xid)])
    241  1.1  dholland #define	NFSRCHASH(xid) \
    242  1.1  dholland 	(&nfsrchash_table[nfsrc_hash(xid)].tbl)
    243  1.1  dholland #define	TRUE	1
    244  1.1  dholland #define	FALSE	0
    245  1.1  dholland #define	NFSRVCACHE_CHECKLEN	100
    246  1.1  dholland 
    247  1.1  dholland /* True iff the rpc reply is an nfs status ONLY! */
    248  1.1  dholland static int nfsv2_repstat[NFS_V3NPROCS] = {
    249  1.1  dholland 	FALSE,
    250  1.1  dholland 	FALSE,
    251  1.1  dholland 	FALSE,
    252  1.1  dholland 	FALSE,
    253  1.1  dholland 	FALSE,
    254  1.1  dholland 	FALSE,
    255  1.1  dholland 	FALSE,
    256  1.1  dholland 	FALSE,
    257  1.1  dholland 	FALSE,
    258  1.1  dholland 	FALSE,
    259  1.1  dholland 	TRUE,
    260  1.1  dholland 	TRUE,
    261  1.1  dholland 	TRUE,
    262  1.1  dholland 	TRUE,
    263  1.1  dholland 	FALSE,
    264  1.1  dholland 	TRUE,
    265  1.1  dholland 	FALSE,
    266  1.1  dholland 	FALSE,
    267  1.1  dholland 	FALSE,
    268  1.1  dholland 	FALSE,
    269  1.1  dholland 	FALSE,
    270  1.1  dholland 	FALSE,
    271  1.1  dholland };
    272  1.1  dholland 
    273  1.1  dholland /*
    274  1.1  dholland  * Will NFS want to work over IPv6 someday?
    275  1.1  dholland  */
    276  1.1  dholland #define	NETFAMILY(rp) \
    277  1.1  dholland 		(((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
    278  1.1  dholland 
    279  1.1  dholland /* local functions */
    280  1.1  dholland static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
    281  1.1  dholland static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
    282  1.1  dholland static void nfsrc_lock(struct nfsrvcache *rp);
    283  1.1  dholland static void nfsrc_unlock(struct nfsrvcache *rp);
    284  1.1  dholland static void nfsrc_wanted(struct nfsrvcache *rp);
    285  1.1  dholland static void nfsrc_freecache(struct nfsrvcache *rp);
    286  1.1  dholland static void nfsrc_trimcache(u_int64_t, struct socket *);
    287  1.1  dholland static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
    288  1.1  dholland     struct socket *);
    289  1.1  dholland static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
    290  1.1  dholland static void nfsrc_marksametcpconn(u_int64_t);
    291  1.1  dholland 
    292  1.1  dholland /*
    293  1.1  dholland  * Return the correct mutex for this cache entry.
    294  1.1  dholland  */
    295  1.1  dholland static __inline struct mtx *
    296  1.1  dholland nfsrc_cachemutex(struct nfsrvcache *rp)
    297  1.1  dholland {
    298  1.1  dholland 
    299  1.1  dholland 	if ((rp->rc_flag & RC_UDP) != 0)
    300  1.1  dholland 		return (&nfsrc_udpmtx);
    301  1.1  dholland 	return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
    302  1.1  dholland }
    303  1.1  dholland 
    304  1.1  dholland /*
    305  1.1  dholland  * Initialize the server request cache list
    306  1.1  dholland  */
    307  1.1  dholland APPLESTATIC void
    308  1.1  dholland nfsrvd_initcache(void)
    309  1.1  dholland {
    310  1.1  dholland 	int i;
    311  1.1  dholland 	static int inited = 0;
    312  1.1  dholland 
    313  1.1  dholland 	if (inited)
    314  1.1  dholland 		return;
    315  1.1  dholland 	inited = 1;
    316  1.1  dholland 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    317  1.1  dholland 		LIST_INIT(&nfsrvudphashtbl[i]);
    318  1.1  dholland 		LIST_INIT(&nfsrchash_table[i].tbl);
    319  1.1  dholland 	}
    320  1.1  dholland 	TAILQ_INIT(&nfsrvudplru);
    321  1.1  dholland 	nfsrc_tcpsavedreplies = 0;
    322  1.1  dholland 	nfsrc_udpcachesize = 0;
    323  1.1  dholland 	newnfsstats.srvcache_tcppeak = 0;
    324  1.1  dholland 	newnfsstats.srvcache_size = 0;
    325  1.1  dholland }
    326  1.1  dholland 
    327  1.1  dholland /*
    328  1.1  dholland  * Get a cache entry for this request. Basically just malloc a new one
    329  1.1  dholland  * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
    330  1.1  dholland  * Call nfsrc_trimcache() to clean up the cache before returning.
    331  1.1  dholland  */
    332  1.1  dholland APPLESTATIC int
    333  1.1  dholland nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
    334  1.1  dholland {
    335  1.1  dholland 	struct nfsrvcache *newrp;
    336  1.1  dholland 	int ret;
    337  1.1  dholland 
    338  1.1  dholland 	if (nd->nd_procnum == NFSPROC_NULL)
    339  1.1  dholland 		panic("nfsd cache null");
    340  1.1  dholland 	MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
    341  1.1  dholland 	    M_NFSRVCACHE, M_WAITOK);
    342  1.1  dholland 	NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
    343  1.1  dholland 	if (nd->nd_flag & ND_NFSV4)
    344  1.1  dholland 		newrp->rc_flag = RC_NFSV4;
    345  1.1  dholland 	else if (nd->nd_flag & ND_NFSV3)
    346  1.1  dholland 		newrp->rc_flag = RC_NFSV3;
    347  1.1  dholland 	else
    348  1.1  dholland 		newrp->rc_flag = RC_NFSV2;
    349  1.1  dholland 	newrp->rc_xid = nd->nd_retxid;
    350  1.1  dholland 	newrp->rc_proc = nd->nd_procnum;
    351  1.1  dholland 	newrp->rc_sockref = nd->nd_sockref;
    352  1.1  dholland 	newrp->rc_cachetime = nd->nd_tcpconntime;
    353  1.1  dholland 	if (nd->nd_flag & ND_SAMETCPCONN)
    354  1.1  dholland 		newrp->rc_flag |= RC_SAMETCPCONN;
    355  1.1  dholland 	if (nd->nd_nam2 != NULL) {
    356  1.1  dholland 		newrp->rc_flag |= RC_UDP;
    357  1.1  dholland 		ret = nfsrc_getudp(nd, newrp);
    358  1.1  dholland 	} else {
    359  1.1  dholland 		ret = nfsrc_gettcp(nd, newrp);
    360  1.1  dholland 	}
    361  1.1  dholland 	nfsrc_trimcache(nd->nd_sockref, so);
    362  1.1  dholland 	NFSEXITCODE2(0, nd);
    363  1.1  dholland 	return (ret);
    364  1.1  dholland }
    365  1.1  dholland 
    366  1.1  dholland /*
    367  1.1  dholland  * For UDP (v2, v3):
    368  1.1  dholland  * - key on <xid, NFS version, RPC#, Client host ip#>
    369  1.1  dholland  *   (at most one entry for each key)
    370  1.1  dholland  */
    371  1.1  dholland static int
    372  1.1  dholland nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
    373  1.1  dholland {
    374  1.1  dholland 	struct nfsrvcache *rp;
    375  1.1  dholland 	struct sockaddr_in *saddr;
    376  1.1  dholland 	struct sockaddr_in6 *saddr6;
    377  1.1  dholland 	struct nfsrvhashhead *hp;
    378  1.1  dholland 	int ret = 0;
    379  1.1  dholland 	struct mtx *mutex;
    380  1.1  dholland 
    381  1.1  dholland 	mutex = nfsrc_cachemutex(newrp);
    382  1.1  dholland 	hp = NFSRCUDPHASH(newrp->rc_xid);
    383  1.1  dholland loop:
    384  1.1  dholland 	mtx_lock(mutex);
    385  1.1  dholland 	LIST_FOREACH(rp, hp, rc_hash) {
    386  1.1  dholland 	    if (newrp->rc_xid == rp->rc_xid &&
    387  1.1  dholland 		newrp->rc_proc == rp->rc_proc &&
    388  1.1  dholland 		(newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
    389  1.1  dholland 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
    390  1.1  dholland 			if ((rp->rc_flag & RC_LOCKED) != 0) {
    391  1.1  dholland 				rp->rc_flag |= RC_WANTED;
    392  1.1  dholland 				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
    393  1.1  dholland 				    "nfsrc", 10 * hz);
    394  1.1  dholland 				goto loop;
    395  1.1  dholland 			}
    396  1.1  dholland 			if (rp->rc_flag == 0)
    397  1.1  dholland 				panic("nfs udp cache0");
    398  1.1  dholland 			rp->rc_flag |= RC_LOCKED;
    399  1.1  dholland 			TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    400  1.1  dholland 			TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
    401  1.1  dholland 			if (rp->rc_flag & RC_INPROG) {
    402  1.1  dholland 				newnfsstats.srvcache_inproghits++;
    403  1.1  dholland 				mtx_unlock(mutex);
    404  1.1  dholland 				ret = RC_DROPIT;
    405  1.1  dholland 			} else if (rp->rc_flag & RC_REPSTATUS) {
    406  1.1  dholland 				/*
    407  1.1  dholland 				 * V2 only.
    408  1.1  dholland 				 */
    409  1.1  dholland 				newnfsstats.srvcache_nonidemdonehits++;
    410  1.1  dholland 				mtx_unlock(mutex);
    411  1.1  dholland 				nfsrvd_rephead(nd);
    412  1.1  dholland 				*(nd->nd_errp) = rp->rc_status;
    413  1.1  dholland 				ret = RC_REPLY;
    414  1.1  dholland 				rp->rc_timestamp = NFSD_MONOSEC +
    415  1.1  dholland 					NFSRVCACHE_UDPTIMEOUT;
    416  1.1  dholland 			} else if (rp->rc_flag & RC_REPMBUF) {
    417  1.1  dholland 				newnfsstats.srvcache_nonidemdonehits++;
    418  1.1  dholland 				mtx_unlock(mutex);
    419  1.1  dholland 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
    420  1.1  dholland 					M_COPYALL, M_WAITOK);
    421  1.1  dholland 				ret = RC_REPLY;
    422  1.1  dholland 				rp->rc_timestamp = NFSD_MONOSEC +
    423  1.1  dholland 					NFSRVCACHE_UDPTIMEOUT;
    424  1.1  dholland 			} else {
    425  1.1  dholland 				panic("nfs udp cache1");
    426  1.1  dholland 			}
    427  1.1  dholland 			nfsrc_unlock(rp);
    428  1.1  dholland 			free((caddr_t)newrp, M_NFSRVCACHE);
    429  1.1  dholland 			goto out;
    430  1.1  dholland 		}
    431  1.1  dholland 	}
    432  1.1  dholland 	newnfsstats.srvcache_misses++;
    433  1.1  dholland 	atomic_add_int(&newnfsstats.srvcache_size, 1);
    434  1.1  dholland 	nfsrc_udpcachesize++;
    435  1.1  dholland 
    436  1.1  dholland 	newrp->rc_flag |= RC_INPROG;
    437  1.1  dholland 	saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
    438  1.1  dholland 	if (saddr->sin_family == AF_INET)
    439  1.1  dholland 		newrp->rc_inet = saddr->sin_addr.s_addr;
    440  1.1  dholland 	else if (saddr->sin_family == AF_INET6) {
    441  1.1  dholland 		saddr6 = (struct sockaddr_in6 *)saddr;
    442  1.1  dholland 		NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
    443  1.1  dholland 		    sizeof (struct in6_addr));
    444  1.1  dholland 		newrp->rc_flag |= RC_INETIPV6;
    445  1.1  dholland 	}
    446  1.1  dholland 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
    447  1.1  dholland 	TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
    448  1.1  dholland 	mtx_unlock(mutex);
    449  1.1  dholland 	nd->nd_rp = newrp;
    450  1.1  dholland 	ret = RC_DOIT;
    451  1.1  dholland 
    452  1.1  dholland out:
    453  1.1  dholland 	NFSEXITCODE2(0, nd);
    454  1.1  dholland 	return (ret);
    455  1.1  dholland }
    456  1.1  dholland 
    457  1.1  dholland /*
    458  1.1  dholland  * Update a request cache entry after the rpc has been done
    459  1.1  dholland  */
    460  1.1  dholland APPLESTATIC struct nfsrvcache *
    461  1.1  dholland nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
    462  1.1  dholland {
    463  1.1  dholland 	struct nfsrvcache *rp;
    464  1.1  dholland 	struct nfsrvcache *retrp = NULL;
    465  1.1  dholland 	mbuf_t m;
    466  1.1  dholland 	struct mtx *mutex;
    467  1.1  dholland 
    468  1.1  dholland 	rp = nd->nd_rp;
    469  1.1  dholland 	if (!rp)
    470  1.1  dholland 		panic("nfsrvd_updatecache null rp");
    471  1.1  dholland 	nd->nd_rp = NULL;
    472  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    473  1.1  dholland 	mtx_lock(mutex);
    474  1.1  dholland 	nfsrc_lock(rp);
    475  1.1  dholland 	if (!(rp->rc_flag & RC_INPROG))
    476  1.1  dholland 		panic("nfsrvd_updatecache not inprog");
    477  1.1  dholland 	rp->rc_flag &= ~RC_INPROG;
    478  1.1  dholland 	if (rp->rc_flag & RC_UDP) {
    479  1.1  dholland 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    480  1.1  dholland 		TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
    481  1.1  dholland 	}
    482  1.1  dholland 
    483  1.1  dholland 	/*
    484  1.1  dholland 	 * Reply from cache is a special case returned by nfsrv_checkseqid().
    485  1.1  dholland 	 */
    486  1.1  dholland 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
    487  1.1  dholland 		newnfsstats.srvcache_nonidemdonehits++;
    488  1.1  dholland 		mtx_unlock(mutex);
    489  1.1  dholland 		nd->nd_repstat = 0;
    490  1.1  dholland 		if (nd->nd_mreq)
    491  1.1  dholland 			mbuf_freem(nd->nd_mreq);
    492  1.1  dholland 		if (!(rp->rc_flag & RC_REPMBUF))
    493  1.1  dholland 			panic("reply from cache");
    494  1.1  dholland 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
    495  1.1  dholland 		    M_COPYALL, M_WAITOK);
    496  1.1  dholland 		rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    497  1.1  dholland 		nfsrc_unlock(rp);
    498  1.1  dholland 		goto out;
    499  1.1  dholland 	}
    500  1.1  dholland 
    501  1.1  dholland 	/*
    502  1.1  dholland 	 * If rc_refcnt > 0, save it
    503  1.1  dholland 	 * For UDP, save it if ND_SAVEREPLY is set
    504  1.1  dholland 	 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
    505  1.1  dholland 	 */
    506  1.1  dholland 	if (nd->nd_repstat != NFSERR_DONTREPLY &&
    507  1.1  dholland 	    (rp->rc_refcnt > 0 ||
    508  1.1  dholland 	     ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
    509  1.1  dholland 	     ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
    510  1.1  dholland 	      nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
    511  1.1  dholland 	      nfsrc_tcpnonidempotent))) {
    512  1.1  dholland 		if (rp->rc_refcnt > 0) {
    513  1.1  dholland 			if (!(rp->rc_flag & RC_NFSV4))
    514  1.1  dholland 				panic("update_cache refcnt");
    515  1.1  dholland 			rp->rc_flag |= RC_REFCNT;
    516  1.1  dholland 		}
    517  1.1  dholland 		if ((nd->nd_flag & ND_NFSV2) &&
    518  1.1  dholland 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
    519  1.1  dholland 			rp->rc_status = nd->nd_repstat;
    520  1.1  dholland 			rp->rc_flag |= RC_REPSTATUS;
    521  1.1  dholland 			mtx_unlock(mutex);
    522  1.1  dholland 		} else {
    523  1.1  dholland 			if (!(rp->rc_flag & RC_UDP)) {
    524  1.1  dholland 			    atomic_add_int(&nfsrc_tcpsavedreplies, 1);
    525  1.1  dholland 			    if (nfsrc_tcpsavedreplies >
    526  1.1  dholland 				newnfsstats.srvcache_tcppeak)
    527  1.1  dholland 				newnfsstats.srvcache_tcppeak =
    528  1.1  dholland 				    nfsrc_tcpsavedreplies;
    529  1.1  dholland 			}
    530  1.1  dholland 			mtx_unlock(mutex);
    531  1.1  dholland 			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
    532  1.1  dholland 			mtx_lock(mutex);
    533  1.1  dholland 			rp->rc_reply = m;
    534  1.1  dholland 			rp->rc_flag |= RC_REPMBUF;
    535  1.1  dholland 			mtx_unlock(mutex);
    536  1.1  dholland 		}
    537  1.1  dholland 		if (rp->rc_flag & RC_UDP) {
    538  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC +
    539  1.1  dholland 			    NFSRVCACHE_UDPTIMEOUT;
    540  1.1  dholland 			nfsrc_unlock(rp);
    541  1.1  dholland 		} else {
    542  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    543  1.1  dholland 			if (rp->rc_refcnt > 0)
    544  1.1  dholland 				nfsrc_unlock(rp);
    545  1.1  dholland 			else
    546  1.1  dholland 				retrp = rp;
    547  1.1  dholland 		}
    548  1.1  dholland 	} else {
    549  1.1  dholland 		nfsrc_freecache(rp);
    550  1.1  dholland 		mtx_unlock(mutex);
    551  1.1  dholland 	}
    552  1.1  dholland 
    553  1.1  dholland out:
    554  1.1  dholland 	nfsrc_trimcache(nd->nd_sockref, so);
    555  1.1  dholland 	NFSEXITCODE2(0, nd);
    556  1.1  dholland 	return (retrp);
    557  1.1  dholland }
    558  1.1  dholland 
    559  1.1  dholland /*
    560  1.1  dholland  * Invalidate and, if possible, free an in prog cache entry.
    561  1.1  dholland  * Must not sleep.
    562  1.1  dholland  */
    563  1.1  dholland APPLESTATIC void
    564  1.1  dholland nfsrvd_delcache(struct nfsrvcache *rp)
    565  1.1  dholland {
    566  1.1  dholland 	struct mtx *mutex;
    567  1.1  dholland 
    568  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    569  1.1  dholland 	if (!(rp->rc_flag & RC_INPROG))
    570  1.1  dholland 		panic("nfsrvd_delcache not in prog");
    571  1.1  dholland 	mtx_lock(mutex);
    572  1.1  dholland 	rp->rc_flag &= ~RC_INPROG;
    573  1.1  dholland 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
    574  1.1  dholland 		nfsrc_freecache(rp);
    575  1.1  dholland 	mtx_unlock(mutex);
    576  1.1  dholland }
    577  1.1  dholland 
    578  1.1  dholland /*
    579  1.1  dholland  * Called after nfsrvd_updatecache() once the reply is sent, to update
    580  1.1  dholland  * the entry for nfsrc_activesocket() and unlock it. The argument is
    581  1.1  dholland  * the pointer returned by nfsrvd_updatecache().
    582  1.1  dholland  */
    583  1.1  dholland APPLESTATIC void
    584  1.1  dholland nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
    585  1.1  dholland {
    586  1.1  dholland 	tcp_seq tmp_seq;
    587  1.1  dholland 	struct mtx *mutex;
    588  1.1  dholland 
    589  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    590  1.1  dholland 	if (!(rp->rc_flag & RC_LOCKED))
    591  1.1  dholland 		panic("nfsrvd_sentcache not locked");
    592  1.1  dholland 	if (!err) {
    593  1.1  dholland 		if ((so->so_proto->pr_domain->dom_family != AF_INET &&
    594  1.1  dholland 		     so->so_proto->pr_domain->dom_family != AF_INET6) ||
    595  1.1  dholland 		     so->so_proto->pr_protocol != IPPROTO_TCP)
    596  1.1  dholland 			panic("nfs sent cache");
    597  1.1  dholland 		if (nfsrv_getsockseqnum(so, &tmp_seq)) {
    598  1.1  dholland 			mtx_lock(mutex);
    599  1.1  dholland 			rp->rc_tcpseq = tmp_seq;
    600  1.1  dholland 			rp->rc_flag |= RC_TCPSEQ;
    601  1.1  dholland 			mtx_unlock(mutex);
    602  1.1  dholland 		}
    603  1.1  dholland 	}
    604  1.1  dholland 	nfsrc_unlock(rp);
    605  1.1  dholland }
    606  1.1  dholland 
    607  1.1  dholland /*
    608  1.1  dholland  * Get a cache entry for TCP
    609  1.1  dholland  * - key on <xid, nfs version>
    610  1.1  dholland  *   (allow multiple entries for a given key)
    611  1.1  dholland  */
    612  1.1  dholland static int
    613  1.1  dholland nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
    614  1.1  dholland {
    615  1.1  dholland 	struct nfsrvcache *rp, *nextrp;
    616  1.1  dholland 	int i;
    617  1.1  dholland 	struct nfsrvcache *hitrp;
    618  1.1  dholland 	struct nfsrvhashhead *hp, nfsrc_templist;
    619  1.1  dholland 	int hit, ret = 0;
    620  1.1  dholland 	struct mtx *mutex;
    621  1.1  dholland 
    622  1.1  dholland 	mutex = nfsrc_cachemutex(newrp);
    623  1.1  dholland 	hp = NFSRCHASH(newrp->rc_xid);
    624  1.1  dholland 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
    625  1.1  dholland tryagain:
    626  1.1  dholland 	mtx_lock(mutex);
    627  1.1  dholland 	hit = 1;
    628  1.1  dholland 	LIST_INIT(&nfsrc_templist);
    629  1.1  dholland 	/*
    630  1.1  dholland 	 * Get all the matches and put them on the temp list.
    631  1.1  dholland 	 */
    632  1.1  dholland 	rp = LIST_FIRST(hp);
    633  1.1  dholland 	while (rp != LIST_END(hp)) {
    634  1.1  dholland 		nextrp = LIST_NEXT(rp, rc_hash);
    635  1.1  dholland 		if (newrp->rc_xid == rp->rc_xid &&
    636  1.1  dholland 		    (!(rp->rc_flag & RC_INPROG) ||
    637  1.1  dholland 		     ((newrp->rc_flag & RC_SAMETCPCONN) &&
    638  1.1  dholland 		      newrp->rc_sockref == rp->rc_sockref)) &&
    639  1.1  dholland 		    (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
    640  1.1  dholland 		    newrp->rc_proc == rp->rc_proc &&
    641  1.1  dholland 		    ((newrp->rc_flag & RC_NFSV4) &&
    642  1.1  dholland 		     newrp->rc_sockref != rp->rc_sockref &&
    643  1.1  dholland 		     newrp->rc_cachetime >= rp->rc_cachetime)
    644  1.1  dholland 		    && newrp->rc_reqlen == rp->rc_reqlen &&
    645  1.1  dholland 		    newrp->rc_cksum == rp->rc_cksum) {
    646  1.1  dholland 			LIST_REMOVE(rp, rc_hash);
    647  1.1  dholland 			LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
    648  1.1  dholland 		}
    649  1.1  dholland 		rp = nextrp;
    650  1.1  dholland 	}
    651  1.1  dholland 
    652  1.1  dholland 	/*
    653  1.1  dholland 	 * Now, use nfsrc_templist to decide if there is a match.
    654  1.1  dholland 	 */
    655  1.1  dholland 	i = 0;
    656  1.1  dholland 	LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
    657  1.1  dholland 		i++;
    658  1.1  dholland 		if (rp->rc_refcnt > 0) {
    659  1.1  dholland 			hit = 0;
    660  1.1  dholland 			break;
    661  1.1  dholland 		}
    662  1.1  dholland 	}
    663  1.1  dholland 	/*
    664  1.1  dholland 	 * Can be a hit only if one entry left.
    665  1.1  dholland 	 * Note possible hit entry and put nfsrc_templist back on hash
    666  1.1  dholland 	 * list.
    667  1.1  dholland 	 */
    668  1.1  dholland 	if (i != 1)
    669  1.1  dholland 		hit = 0;
    670  1.1  dholland 	hitrp = rp = LIST_FIRST(&nfsrc_templist);
    671  1.1  dholland 	while (rp != LIST_END(&nfsrc_templist)) {
    672  1.1  dholland 		nextrp = LIST_NEXT(rp, rc_hash);
    673  1.1  dholland 		LIST_REMOVE(rp, rc_hash);
    674  1.1  dholland 		LIST_INSERT_HEAD(hp, rp, rc_hash);
    675  1.1  dholland 		rp = nextrp;
    676  1.1  dholland 	}
    677  1.1  dholland 	if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist))
    678  1.1  dholland 		panic("nfs gettcp cache templist");
    679  1.1  dholland 
    680  1.1  dholland 	if (hit) {
    681  1.1  dholland 		rp = hitrp;
    682  1.1  dholland 		if ((rp->rc_flag & RC_LOCKED) != 0) {
    683  1.1  dholland 			rp->rc_flag |= RC_WANTED;
    684  1.1  dholland 			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
    685  1.1  dholland 			    "nfsrc", 10 * hz);
    686  1.1  dholland 			goto tryagain;
    687  1.1  dholland 		}
    688  1.1  dholland 		if (rp->rc_flag == 0)
    689  1.1  dholland 			panic("nfs tcp cache0");
    690  1.1  dholland 		rp->rc_flag |= RC_LOCKED;
    691  1.1  dholland 		if (rp->rc_flag & RC_INPROG) {
    692  1.1  dholland 			newnfsstats.srvcache_inproghits++;
    693  1.1  dholland 			mtx_unlock(mutex);
    694  1.1  dholland 			if (newrp->rc_sockref == rp->rc_sockref)
    695  1.1  dholland 				nfsrc_marksametcpconn(rp->rc_sockref);
    696  1.1  dholland 			ret = RC_DROPIT;
    697  1.1  dholland 		} else if (rp->rc_flag & RC_REPSTATUS) {
    698  1.1  dholland 			/*
    699  1.1  dholland 			 * V2 only.
    700  1.1  dholland 			 */
    701  1.1  dholland 			newnfsstats.srvcache_nonidemdonehits++;
    702  1.1  dholland 			mtx_unlock(mutex);
    703  1.1  dholland 			if (newrp->rc_sockref == rp->rc_sockref)
    704  1.1  dholland 				nfsrc_marksametcpconn(rp->rc_sockref);
    705  1.1  dholland 			ret = RC_REPLY;
    706  1.1  dholland 			nfsrvd_rephead(nd);
    707  1.1  dholland 			*(nd->nd_errp) = rp->rc_status;
    708  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    709  1.1  dholland 		} else if (rp->rc_flag & RC_REPMBUF) {
    710  1.1  dholland 			newnfsstats.srvcache_nonidemdonehits++;
    711  1.1  dholland 			mtx_unlock(mutex);
    712  1.1  dholland 			if (newrp->rc_sockref == rp->rc_sockref)
    713  1.1  dholland 				nfsrc_marksametcpconn(rp->rc_sockref);
    714  1.1  dholland 			ret = RC_REPLY;
    715  1.1  dholland 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
    716  1.1  dholland 				M_COPYALL, M_WAITOK);
    717  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    718  1.1  dholland 		} else {
    719  1.1  dholland 			panic("nfs tcp cache1");
    720  1.1  dholland 		}
    721  1.1  dholland 		nfsrc_unlock(rp);
    722  1.1  dholland 		free((caddr_t)newrp, M_NFSRVCACHE);
    723  1.1  dholland 		goto out;
    724  1.1  dholland 	}
    725  1.1  dholland 	newnfsstats.srvcache_misses++;
    726  1.1  dholland 	atomic_add_int(&newnfsstats.srvcache_size, 1);
    727  1.1  dholland 
    728  1.1  dholland 	/*
    729  1.1  dholland 	 * For TCP, multiple entries for a key are allowed, so don't
    730  1.1  dholland 	 * chain it into the hash table until done.
    731  1.1  dholland 	 */
    732  1.1  dholland 	newrp->rc_cachetime = NFSD_MONOSEC;
    733  1.1  dholland 	newrp->rc_flag |= RC_INPROG;
    734  1.1  dholland 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
    735  1.1  dholland 	mtx_unlock(mutex);
    736  1.1  dholland 	nd->nd_rp = newrp;
    737  1.1  dholland 	ret = RC_DOIT;
    738  1.1  dholland 
    739  1.1  dholland out:
    740  1.1  dholland 	NFSEXITCODE2(0, nd);
    741  1.1  dholland 	return (ret);
    742  1.1  dholland }
    743  1.1  dholland 
    744  1.1  dholland /*
    745  1.1  dholland  * Lock a cache entry.
    746  1.1  dholland  */
    747  1.1  dholland static void
    748  1.1  dholland nfsrc_lock(struct nfsrvcache *rp)
    749  1.1  dholland {
    750  1.1  dholland 	struct mtx *mutex;
    751  1.1  dholland 
    752  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    753  1.1  dholland 	mtx_assert(mutex, MA_OWNED);
    754  1.1  dholland 	while ((rp->rc_flag & RC_LOCKED) != 0) {
    755  1.1  dholland 		rp->rc_flag |= RC_WANTED;
    756  1.1  dholland 		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
    757  1.1  dholland 	}
    758  1.1  dholland 	rp->rc_flag |= RC_LOCKED;
    759  1.1  dholland }
    760  1.1  dholland 
    761  1.1  dholland /*
    762  1.1  dholland  * Unlock a cache entry.
    763  1.1  dholland  */
    764  1.1  dholland static void
    765  1.1  dholland nfsrc_unlock(struct nfsrvcache *rp)
    766  1.1  dholland {
    767  1.1  dholland 	struct mtx *mutex;
    768  1.1  dholland 
    769  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    770  1.1  dholland 	mtx_lock(mutex);
    771  1.1  dholland 	rp->rc_flag &= ~RC_LOCKED;
    772  1.1  dholland 	nfsrc_wanted(rp);
    773  1.1  dholland 	mtx_unlock(mutex);
    774  1.1  dholland }
    775  1.1  dholland 
    776  1.1  dholland /*
    777  1.1  dholland  * Wakeup anyone wanting entry.
    778  1.1  dholland  */
    779  1.1  dholland static void
    780  1.1  dholland nfsrc_wanted(struct nfsrvcache *rp)
    781  1.1  dholland {
    782  1.1  dholland 	if (rp->rc_flag & RC_WANTED) {
    783  1.1  dholland 		rp->rc_flag &= ~RC_WANTED;
    784  1.1  dholland 		wakeup((caddr_t)rp);
    785  1.1  dholland 	}
    786  1.1  dholland }
    787  1.1  dholland 
    788  1.1  dholland /*
    789  1.1  dholland  * Free up the entry.
    790  1.1  dholland  * Must not sleep.
    791  1.1  dholland  */
    792  1.1  dholland static void
    793  1.1  dholland nfsrc_freecache(struct nfsrvcache *rp)
    794  1.1  dholland {
    795  1.1  dholland 
    796  1.1  dholland 	LIST_REMOVE(rp, rc_hash);
    797  1.1  dholland 	if (rp->rc_flag & RC_UDP) {
    798  1.1  dholland 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    799  1.1  dholland 		nfsrc_udpcachesize--;
    800  1.1  dholland 	}
    801  1.1  dholland 	nfsrc_wanted(rp);
    802  1.1  dholland 	if (rp->rc_flag & RC_REPMBUF) {
    803  1.1  dholland 		mbuf_freem(rp->rc_reply);
    804  1.1  dholland 		if (!(rp->rc_flag & RC_UDP))
    805  1.1  dholland 			atomic_add_int(&nfsrc_tcpsavedreplies, -1);
    806  1.1  dholland 	}
    807  1.1  dholland 	FREE((caddr_t)rp, M_NFSRVCACHE);
    808  1.1  dholland 	atomic_add_int(&newnfsstats.srvcache_size, -1);
    809  1.1  dholland }
    810  1.1  dholland 
    811  1.1  dholland /*
    812  1.1  dholland  * Clean out the cache. Called when nfsserver module is unloaded.
    813  1.1  dholland  */
    814  1.1  dholland APPLESTATIC void
    815  1.1  dholland nfsrvd_cleancache(void)
    816  1.1  dholland {
    817  1.1  dholland 	struct nfsrvcache *rp, *nextrp;
    818  1.1  dholland 	int i;
    819  1.1  dholland 
    820  1.1  dholland 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    821  1.1  dholland 		mtx_lock(&nfsrchash_table[i].mtx);
    822  1.1  dholland 		LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
    823  1.1  dholland 			nfsrc_freecache(rp);
    824  1.1  dholland 		mtx_unlock(&nfsrchash_table[i].mtx);
    825  1.1  dholland 	}
    826  1.1  dholland 	mtx_lock(&nfsrc_udpmtx);
    827  1.1  dholland 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    828  1.1  dholland 		LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
    829  1.1  dholland 			nfsrc_freecache(rp);
    830  1.1  dholland 		}
    831  1.1  dholland 	}
    832  1.1  dholland 	newnfsstats.srvcache_size = 0;
    833  1.1  dholland 	mtx_unlock(&nfsrc_udpmtx);
    834  1.1  dholland 	nfsrc_tcpsavedreplies = 0;
    835  1.1  dholland }
    836  1.1  dholland 
    837  1.1  dholland /*
    838  1.1  dholland  * The basic rule is to get rid of entries that are expired.
    839  1.1  dholland  */
    840  1.1  dholland static void
    841  1.1  dholland nfsrc_trimcache(u_int64_t sockref, struct socket *so)
    842  1.1  dholland {
    843  1.1  dholland 	struct nfsrvcache *rp, *nextrp;
    844  1.1  dholland 	int i, j, k, time_histo[10];
    845  1.1  dholland 	time_t thisstamp;
    846  1.1  dholland 	static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
    847  1.1  dholland 	static int onethread = 0;
    848  1.1  dholland 
    849  1.1  dholland 	if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
    850  1.1  dholland 		return;
    851  1.1  dholland 	if (NFSD_MONOSEC != udp_lasttrim ||
    852  1.1  dholland 	    nfsrc_udpcachesize >= (nfsrc_udphighwater +
    853  1.1  dholland 	    nfsrc_udphighwater / 2)) {
    854  1.1  dholland 		mtx_lock(&nfsrc_udpmtx);
    855  1.1  dholland 		udp_lasttrim = NFSD_MONOSEC;
    856  1.1  dholland 		TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
    857  1.1  dholland 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
    858  1.1  dholland 			     && rp->rc_refcnt == 0
    859  1.1  dholland 			     && ((rp->rc_flag & RC_REFCNT) ||
    860  1.1  dholland 				 udp_lasttrim > rp->rc_timestamp ||
    861  1.1  dholland 				 nfsrc_udpcachesize > nfsrc_udphighwater))
    862  1.1  dholland 				nfsrc_freecache(rp);
    863  1.1  dholland 		}
    864  1.1  dholland 		mtx_unlock(&nfsrc_udpmtx);
    865  1.1  dholland 	}
    866  1.1  dholland 	if (NFSD_MONOSEC != tcp_lasttrim ||
    867  1.1  dholland 	    nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
    868  1.1  dholland 		for (i = 0; i < 10; i++)
    869  1.1  dholland 			time_histo[i] = 0;
    870  1.1  dholland 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    871  1.1  dholland 			mtx_lock(&nfsrchash_table[i].mtx);
    872  1.1  dholland 			if (i == 0)
    873  1.1  dholland 				tcp_lasttrim = NFSD_MONOSEC;
    874  1.1  dholland 			LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
    875  1.1  dholland 			    nextrp) {
    876  1.1  dholland 				if (!(rp->rc_flag &
    877  1.1  dholland 				     (RC_INPROG|RC_LOCKED|RC_WANTED))
    878  1.1  dholland 				     && rp->rc_refcnt == 0) {
    879  1.1  dholland 					/*
    880  1.1  dholland 					 * The timestamps range from roughly the
    881  1.1  dholland 					 * present (tcp_lasttrim) to the present
    882  1.1  dholland 					 * + nfsrc_tcptimeout. Generate a simple
    883  1.1  dholland 					 * histogram of where the timeouts fall.
    884  1.1  dholland 					 */
    885  1.1  dholland 					j = rp->rc_timestamp - tcp_lasttrim;
    886  1.1  dholland 					if (j >= nfsrc_tcptimeout)
    887  1.1  dholland 						j = nfsrc_tcptimeout - 1;
    888  1.1  dholland 					if (j < 0)
    889  1.1  dholland 						j = 0;
    890  1.1  dholland 					j = (j * 10 / nfsrc_tcptimeout) % 10;
    891  1.1  dholland 					time_histo[j]++;
    892  1.1  dholland 					if ((rp->rc_flag & RC_REFCNT) ||
    893  1.1  dholland 					    tcp_lasttrim > rp->rc_timestamp ||
    894  1.1  dholland 					    nfsrc_activesocket(rp, sockref, so))
    895  1.1  dholland 						nfsrc_freecache(rp);
    896  1.1  dholland 				}
    897  1.1  dholland 			}
    898  1.1  dholland 			mtx_unlock(&nfsrchash_table[i].mtx);
    899  1.1  dholland 		}
    900  1.1  dholland 		j = nfsrc_tcphighwater / 5;	/* 20% of it */
    901  1.1  dholland 		if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) {
    902  1.1  dholland 			/*
    903  1.1  dholland 			 * Trim some more with a smaller timeout of as little
    904  1.1  dholland 			 * as 20% of nfsrc_tcptimeout to try and get below
    905  1.1  dholland 			 * 80% of the nfsrc_tcphighwater.
    906  1.1  dholland 			 */
    907  1.1  dholland 			k = 0;
    908  1.1  dholland 			for (i = 0; i < 8; i++) {
    909  1.1  dholland 				k += time_histo[i];
    910  1.1  dholland 				if (k > j)
    911  1.1  dholland 					break;
    912  1.1  dholland 			}
    913  1.1  dholland 			k = nfsrc_tcptimeout * (i + 1) / 10;
    914  1.1  dholland 			if (k < 1)
    915  1.1  dholland 				k = 1;
    916  1.1  dholland 			thisstamp = tcp_lasttrim + k;
    917  1.1  dholland 			for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    918  1.1  dholland 				mtx_lock(&nfsrchash_table[i].mtx);
    919  1.1  dholland 				LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
    920  1.1  dholland 				    rc_hash, nextrp) {
    921  1.1  dholland 					if (!(rp->rc_flag &
    922  1.1  dholland 					     (RC_INPROG|RC_LOCKED|RC_WANTED))
    923  1.1  dholland 					     && rp->rc_refcnt == 0
    924  1.1  dholland 					     && ((rp->rc_flag & RC_REFCNT) ||
    925  1.1  dholland 						 thisstamp > rp->rc_timestamp ||
    926  1.1  dholland 						 nfsrc_activesocket(rp, sockref,
    927  1.1  dholland 						    so)))
    928  1.1  dholland 						nfsrc_freecache(rp);
    929  1.1  dholland 				}
    930  1.1  dholland 				mtx_unlock(&nfsrchash_table[i].mtx);
    931  1.1  dholland 			}
    932  1.1  dholland 		}
    933  1.1  dholland 	}
    934  1.1  dholland 	atomic_store_rel_int(&onethread, 0);
    935  1.1  dholland }
    936  1.1  dholland 
    937  1.1  dholland /*
    938  1.1  dholland  * Add a seqid# reference to the cache entry.
    939  1.1  dholland  */
    940  1.1  dholland APPLESTATIC void
    941  1.1  dholland nfsrvd_refcache(struct nfsrvcache *rp)
    942  1.1  dholland {
    943  1.1  dholland 	struct mtx *mutex;
    944  1.1  dholland 
    945  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    946  1.1  dholland 	mtx_lock(mutex);
    947  1.1  dholland 	if (rp->rc_refcnt < 0)
    948  1.1  dholland 		panic("nfs cache refcnt");
    949  1.1  dholland 	rp->rc_refcnt++;
    950  1.1  dholland 	mtx_unlock(mutex);
    951  1.1  dholland }
    952  1.1  dholland 
    953  1.1  dholland /*
    954  1.1  dholland  * Dereference a seqid# cache entry.
    955  1.1  dholland  */
    956  1.1  dholland APPLESTATIC void
    957  1.1  dholland nfsrvd_derefcache(struct nfsrvcache *rp)
    958  1.1  dholland {
    959  1.1  dholland 	struct mtx *mutex;
    960  1.1  dholland 
    961  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    962  1.1  dholland 	mtx_lock(mutex);
    963  1.1  dholland 	if (rp->rc_refcnt <= 0)
    964  1.1  dholland 		panic("nfs cache derefcnt");
    965  1.1  dholland 	rp->rc_refcnt--;
    966  1.1  dholland 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
    967  1.1  dholland 		nfsrc_freecache(rp);
    968  1.1  dholland 	mtx_unlock(mutex);
    969  1.1  dholland }
    970  1.1  dholland 
    971  1.1  dholland /*
    972  1.1  dholland  * Check to see if the socket is active.
    973  1.1  dholland  * Return 1 if the reply has been received/acknowledged by the client,
    974  1.1  dholland  * 0 otherwise.
    975  1.1  dholland  * XXX - Uses tcp internals.
    976  1.1  dholland  */
    977  1.1  dholland static int
    978  1.1  dholland nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
    979  1.1  dholland     struct socket *cur_so)
    980  1.1  dholland {
    981  1.1  dholland 	int ret = 0;
    982  1.1  dholland 
    983  1.1  dholland 	if (!(rp->rc_flag & RC_TCPSEQ))
    984  1.1  dholland 		return (ret);
    985  1.1  dholland 	/*
    986  1.1  dholland 	 * If the sockref is the same, it is the same TCP connection.
    987  1.1  dholland 	 */
    988  1.1  dholland 	if (cur_sockref == rp->rc_sockref)
    989  1.1  dholland 		ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
    990  1.1  dholland 	return (ret);
    991  1.1  dholland }
    992  1.1  dholland 
    993  1.1  dholland /*
    994  1.1  dholland  * Calculate the length of the mbuf list and a checksum on the first up to
    995  1.1  dholland  * NFSRVCACHE_CHECKLEN bytes.
    996  1.1  dholland  */
    997  1.1  dholland static int
    998  1.1  dholland nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
    999  1.1  dholland {
   1000  1.1  dholland 	int len = 0, cklen;
   1001  1.1  dholland 	mbuf_t m;
   1002  1.1  dholland 
   1003  1.1  dholland 	m = m1;
   1004  1.1  dholland 	while (m) {
   1005  1.1  dholland 		len += mbuf_len(m);
   1006  1.1  dholland 		m = mbuf_next(m);
   1007  1.1  dholland 	}
   1008  1.1  dholland 	cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
   1009  1.1  dholland 	*cksum = in_cksum(m1, cklen);
   1010  1.1  dholland 	return (len);
   1011  1.1  dholland }
   1012  1.1  dholland 
   1013  1.1  dholland /*
   1014  1.1  dholland  * Mark a TCP connection that is seeing retries. Should never happen for
   1015  1.1  dholland  * NFSv4.
   1016  1.1  dholland  */
   1017  1.1  dholland static void
   1018  1.1  dholland nfsrc_marksametcpconn(u_int64_t sockref)
   1019  1.1  dholland {
   1020  1.1  dholland }
   1021  1.1  dholland 
   1022