Home | History | Annotate | Line # | Download | only in server
nfs_nfsdcache.c revision 1.2.10.2
      1  1.2.10.2  tls /*	$NetBSD: nfs_nfsdcache.c,v 1.2.10.2 2014/08/20 00:04:27 tls Exp $	*/
      2  1.2.10.2  tls /*-
      3  1.2.10.2  tls  * Copyright (c) 1989, 1993
      4  1.2.10.2  tls  *	The Regents of the University of California.  All rights reserved.
      5  1.2.10.2  tls  *
      6  1.2.10.2  tls  * This code is derived from software contributed to Berkeley by
      7  1.2.10.2  tls  * Rick Macklem at The University of Guelph.
      8  1.2.10.2  tls  *
      9  1.2.10.2  tls  * Redistribution and use in source and binary forms, with or without
     10  1.2.10.2  tls  * modification, are permitted provided that the following conditions
     11  1.2.10.2  tls  * are met:
     12  1.2.10.2  tls  * 1. Redistributions of source code must retain the above copyright
     13  1.2.10.2  tls  *    notice, this list of conditions and the following disclaimer.
     14  1.2.10.2  tls  * 2. Redistributions in binary form must reproduce the above copyright
     15  1.2.10.2  tls  *    notice, this list of conditions and the following disclaimer in the
     16  1.2.10.2  tls  *    documentation and/or other materials provided with the distribution.
     17  1.2.10.2  tls  * 4. Neither the name of the University nor the names of its contributors
     18  1.2.10.2  tls  *    may be used to endorse or promote products derived from this software
     19  1.2.10.2  tls  *    without specific prior written permission.
     20  1.2.10.2  tls  *
     21  1.2.10.2  tls  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  1.2.10.2  tls  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  1.2.10.2  tls  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  1.2.10.2  tls  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  1.2.10.2  tls  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  1.2.10.2  tls  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  1.2.10.2  tls  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  1.2.10.2  tls  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  1.2.10.2  tls  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  1.2.10.2  tls  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  1.2.10.2  tls  * SUCH DAMAGE.
     32  1.2.10.2  tls  *
     33  1.2.10.2  tls  */
     34  1.2.10.2  tls 
     35  1.2.10.2  tls #include <sys/cdefs.h>
     36  1.2.10.2  tls /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 254337 2013-08-14 21:11:26Z rmacklem "); */
     37  1.2.10.2  tls __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.2.10.2 2014/08/20 00:04:27 tls Exp $");
     38  1.2.10.2  tls 
     39  1.2.10.2  tls /*
     40  1.2.10.2  tls  * Here is the basic algorithm:
     41  1.2.10.2  tls  * First, some design criteria I used:
     42  1.2.10.2  tls  * - I think a false hit is more serious than a false miss
     43  1.2.10.2  tls  * - A false hit for an RPC that has Op(s) that order via seqid# must be
     44  1.2.10.2  tls  *   avoided at all cost
     45  1.2.10.2  tls  * - A valid hit will probably happen a long time after the original reply
     46  1.2.10.2  tls  *   and the TCP socket that the original request was received on will no
     47  1.2.10.2  tls  *   longer be active
     48  1.2.10.2  tls  *   (The long time delay implies to me that LRU is not appropriate.)
     49  1.2.10.2  tls  * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
     50  1.2.10.2  tls  *   in them as well as minimizing the risk of redoing retried non-idempotent
     51  1.2.10.2  tls  *   Ops.
     52  1.2.10.2  tls  * Because it is biased towards avoiding false hits, multiple entries with
     53  1.2.10.2  tls  * the same xid are to be expected, especially for the case of the entry
     54  1.2.10.2  tls  * in the cache being related to a seqid# sequenced Op.
     55  1.2.10.2  tls  *
     56  1.2.10.2  tls  * The basic algorithm I'm about to code up:
     57  1.2.10.2  tls  * - Null RPCs bypass the cache and are just done
     58  1.2.10.2  tls  * For TCP
     59  1.2.10.2  tls  * 	- key on <xid, NFS version> (as noted above, there can be several
     60  1.2.10.2  tls  * 				     entries with the same key)
     61  1.2.10.2  tls  * 	When a request arrives:
     62  1.2.10.2  tls  * 		For all that match key
     63  1.2.10.2  tls  * 		- if RPC# != OR request_size !=
     64  1.2.10.2  tls  * 			- not a match with this one
     65  1.2.10.2  tls  * 		- if NFSv4 and received on same TCP socket OR
     66  1.2.10.2  tls  *			received on a TCP connection created before the
     67  1.2.10.2  tls  *			entry was cached
     68  1.2.10.2  tls  * 			- not a match with this one
     69  1.2.10.2  tls  * 			(V2,3 clients might retry on same TCP socket)
     70  1.2.10.2  tls  * 		- calculate checksum on first N bytes of NFS XDR
     71  1.2.10.2  tls  * 		- if checksum !=
     72  1.2.10.2  tls  * 			- not a match for this one
     73  1.2.10.2  tls  * 		If any of the remaining ones that match has a
     74  1.2.10.2  tls  * 			seqid_refcnt > 0
     75  1.2.10.2  tls  * 			- not a match (go do RPC, using new cache entry)
     76  1.2.10.2  tls  * 		If one match left
     77  1.2.10.2  tls  * 			- a hit (reply from cache)
     78  1.2.10.2  tls  * 		else
     79  1.2.10.2  tls  * 			- miss (go do RPC, using new cache entry)
     80  1.2.10.2  tls  *
     81  1.2.10.2  tls  * 	During processing of NFSv4 request:
     82  1.2.10.2  tls  * 		- set a flag when a non-idempotent Op is processed
     83  1.2.10.2  tls  * 		- when an Op that uses a seqid# (Open,...) is processed
     84  1.2.10.2  tls  * 			- if same seqid# as referenced entry in cache
     85  1.2.10.2  tls  * 				- free new cache entry
     86  1.2.10.2  tls  * 				- reply from referenced cache entry
     87  1.2.10.2  tls  * 			  else if next seqid# in order
     88  1.2.10.2  tls  * 				- free referenced cache entry
     89  1.2.10.2  tls  * 				- increment seqid_refcnt on new cache entry
     90  1.2.10.2  tls  * 				- set pointer from Openowner/Lockowner to
     91  1.2.10.2  tls  * 					new cache entry (aka reference it)
     92  1.2.10.2  tls  * 			  else if first seqid# in sequence
     93  1.2.10.2  tls  * 				- increment seqid_refcnt on new cache entry
     94  1.2.10.2  tls  * 				- set pointer from Openowner/Lockowner to
     95  1.2.10.2  tls  * 					new cache entry (aka reference it)
     96  1.2.10.2  tls  *
     97  1.2.10.2  tls  * 	At end of RPC processing:
     98  1.2.10.2  tls  * 		- if seqid_refcnt > 0 OR flagged non-idempotent on new
     99  1.2.10.2  tls  * 			cache entry
    100  1.2.10.2  tls  * 			- save reply in cache entry
    101  1.2.10.2  tls  * 			- calculate checksum on first N bytes of NFS XDR
    102  1.2.10.2  tls  * 				request
    103  1.2.10.2  tls  * 			- note op and length of XDR request (in bytes)
    104  1.2.10.2  tls  * 			- timestamp it
    105  1.2.10.2  tls  * 		  else
    106  1.2.10.2  tls  * 			- free new cache entry
    107  1.2.10.2  tls  * 		- Send reply (noting info for socket activity check, below)
    108  1.2.10.2  tls  *
    109  1.2.10.2  tls  * 	For cache entries saved above:
    110  1.2.10.2  tls  * 		- if saved since seqid_refcnt was > 0
    111  1.2.10.2  tls  * 			- free when seqid_refcnt decrements to 0
    112  1.2.10.2  tls  * 			  (when next one in sequence is processed above, or
    113  1.2.10.2  tls  * 			   when Openowner/Lockowner is discarded)
    114  1.2.10.2  tls  * 		  else { non-idempotent Op(s) }
    115  1.2.10.2  tls  * 			- free when
    116  1.2.10.2  tls  * 				- some further activity observed on same
    117  1.2.10.2  tls  * 					socket
    118  1.2.10.2  tls  * 				  (I'm not yet sure how I'm going to do
    119  1.2.10.2  tls  * 				   this. Maybe look at the TCP connection
    120  1.2.10.2  tls  * 				   to see if the send_tcp_sequence# is well
    121  1.2.10.2  tls  * 				   past sent reply OR K additional RPCs
    122  1.2.10.2  tls  * 				   replied on same socket OR?)
    123  1.2.10.2  tls  * 			  OR
    124  1.2.10.2  tls  * 				- when very old (hours, days, weeks?)
    125  1.2.10.2  tls  *
    126  1.2.10.2  tls  * For UDP (v2, 3 only), pretty much the old way:
    127  1.2.10.2  tls  * - key on <xid, NFS version, RPC#, Client host ip#>
    128  1.2.10.2  tls  *   (at most one entry for each key)
    129  1.2.10.2  tls  *
    130  1.2.10.2  tls  * When a Request arrives:
    131  1.2.10.2  tls  * - if a match with entry via key
    132  1.2.10.2  tls  * 	- if RPC marked In_progress
    133  1.2.10.2  tls  * 		- discard request (don't send reply)
    134  1.2.10.2  tls  * 	  else
    135  1.2.10.2  tls  * 		- reply from cache
    136  1.2.10.2  tls  * 		- timestamp cache entry
    137  1.2.10.2  tls  *   else
    138  1.2.10.2  tls  * 	- add entry to cache, marked In_progress
    139  1.2.10.2  tls  * 	- do RPC
    140  1.2.10.2  tls  * 	- when RPC done
    141  1.2.10.2  tls  * 		- if RPC# non-idempotent
    142  1.2.10.2  tls  * 			- mark entry Done (not In_progress)
    143  1.2.10.2  tls  * 			- save reply
    144  1.2.10.2  tls  * 			- timestamp cache entry
    145  1.2.10.2  tls  * 		  else
    146  1.2.10.2  tls  * 			- free cache entry
    147  1.2.10.2  tls  * 		- send reply
    148  1.2.10.2  tls  *
    149  1.2.10.2  tls  * Later, entries with saved replies are free'd a short time (few minutes)
    150  1.2.10.2  tls  * after reply sent (timestamp).
    151  1.2.10.2  tls  * Reference: Chet Juszczak, "Improving the Performance and Correctness
    152  1.2.10.2  tls  *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
    153  1.2.10.2  tls  *		pages 53-63. San Diego, February 1989.
    154  1.2.10.2  tls  *	 for the UDP case.
    155  1.2.10.2  tls  * nfsrc_floodlevel is set to the allowable upper limit for saved replies
    156  1.2.10.2  tls  *	for TCP. For V3, a reply won't be saved when the flood level is
    157  1.2.10.2  tls  *	hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
    158  1.2.10.2  tls  *	that case. This level should be set high enough that this almost
    159  1.2.10.2  tls  *	never happens.
    160  1.2.10.2  tls  */
    161  1.2.10.2  tls #ifndef APPLEKEXT
    162  1.2.10.2  tls #include <fs/nfs/nfsport.h>
    163  1.2.10.2  tls 
    164  1.2.10.2  tls extern struct nfsstats newnfsstats;
    165  1.2.10.2  tls extern struct mtx nfsrc_udpmtx;
    166  1.2.10.2  tls extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
    167  1.2.10.2  tls int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
    168  1.2.10.2  tls #endif	/* !APPLEKEXT */
    169  1.2.10.2  tls 
    170  1.2.10.2  tls SYSCTL_DECL(_vfs_nfsd);
    171  1.2.10.2  tls 
    172  1.2.10.2  tls static u_int	nfsrc_tcphighwater = 0;
    173  1.2.10.2  tls static int
    174  1.2.10.2  tls sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
    175  1.2.10.2  tls {
    176  1.2.10.2  tls 	int error, newhighwater;
    177  1.2.10.2  tls 
    178  1.2.10.2  tls 	newhighwater = nfsrc_tcphighwater;
    179  1.2.10.2  tls 	error = sysctl_handle_int(oidp, &newhighwater, 0, req);
    180  1.2.10.2  tls 	if (error != 0 || req->newptr == NULL)
    181  1.2.10.2  tls 		return (error);
    182  1.2.10.2  tls 	if (newhighwater < 0)
    183  1.2.10.2  tls 		return (EINVAL);
    184  1.2.10.2  tls 	if (newhighwater >= nfsrc_floodlevel)
    185  1.2.10.2  tls 		nfsrc_floodlevel = newhighwater + newhighwater / 5;
    186  1.2.10.2  tls 	nfsrc_tcphighwater = newhighwater;
    187  1.2.10.2  tls 	return (0);
    188  1.2.10.2  tls }
    189  1.2.10.2  tls SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
    190  1.2.10.2  tls     sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
    191  1.2.10.2  tls     "High water mark for TCP cache entries");
    192  1.2.10.2  tls 
    193  1.2.10.2  tls static u_int	nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
    194  1.2.10.2  tls SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
    195  1.2.10.2  tls     &nfsrc_udphighwater, 0,
    196  1.2.10.2  tls     "High water mark for UDP cache entries");
    197  1.2.10.2  tls static u_int	nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
    198  1.2.10.2  tls SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
    199  1.2.10.2  tls     &nfsrc_tcptimeout, 0,
    200  1.2.10.2  tls     "Timeout for TCP entries in the DRC");
    201  1.2.10.2  tls static u_int nfsrc_tcpnonidempotent = 1;
    202  1.2.10.2  tls SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
    203  1.2.10.2  tls     &nfsrc_tcpnonidempotent, 0,
    204  1.2.10.2  tls     "Enable the DRC for NFS over TCP");
    205  1.2.10.2  tls 
    206  1.2.10.2  tls static int nfsrc_udpcachesize = 0;
    207  1.2.10.2  tls static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
    208  1.2.10.2  tls static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
    209  1.2.10.2  tls 
    210  1.2.10.2  tls /*
    211  1.2.10.2  tls  * and the reverse mapping from generic to Version 2 procedure numbers
    212  1.2.10.2  tls  */
    213  1.2.10.2  tls static int newnfsv2_procid[NFS_V3NPROCS] = {
    214  1.2.10.2  tls 	NFSV2PROC_NULL,
    215  1.2.10.2  tls 	NFSV2PROC_GETATTR,
    216  1.2.10.2  tls 	NFSV2PROC_SETATTR,
    217  1.2.10.2  tls 	NFSV2PROC_LOOKUP,
    218  1.2.10.2  tls 	NFSV2PROC_NOOP,
    219  1.2.10.2  tls 	NFSV2PROC_READLINK,
    220  1.2.10.2  tls 	NFSV2PROC_READ,
    221  1.2.10.2  tls 	NFSV2PROC_WRITE,
    222  1.2.10.2  tls 	NFSV2PROC_CREATE,
    223  1.2.10.2  tls 	NFSV2PROC_MKDIR,
    224  1.2.10.2  tls 	NFSV2PROC_SYMLINK,
    225  1.2.10.2  tls 	NFSV2PROC_CREATE,
    226  1.2.10.2  tls 	NFSV2PROC_REMOVE,
    227  1.2.10.2  tls 	NFSV2PROC_RMDIR,
    228  1.2.10.2  tls 	NFSV2PROC_RENAME,
    229  1.2.10.2  tls 	NFSV2PROC_LINK,
    230  1.2.10.2  tls 	NFSV2PROC_READDIR,
    231  1.2.10.2  tls 	NFSV2PROC_NOOP,
    232  1.2.10.2  tls 	NFSV2PROC_STATFS,
    233  1.2.10.2  tls 	NFSV2PROC_NOOP,
    234  1.2.10.2  tls 	NFSV2PROC_NOOP,
    235  1.2.10.2  tls 	NFSV2PROC_NOOP,
    236  1.2.10.2  tls };
    237  1.2.10.2  tls 
    238  1.2.10.2  tls #define	nfsrc_hash(xid)	(((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
    239  1.2.10.2  tls #define	NFSRCUDPHASH(xid) \
    240  1.2.10.2  tls 	(&nfsrvudphashtbl[nfsrc_hash(xid)])
    241  1.2.10.2  tls #define	NFSRCHASH(xid) \
    242  1.2.10.2  tls 	(&nfsrchash_table[nfsrc_hash(xid)].tbl)
    243  1.2.10.2  tls #define	TRUE	1
    244  1.2.10.2  tls #define	FALSE	0
    245  1.2.10.2  tls #define	NFSRVCACHE_CHECKLEN	100
    246  1.2.10.2  tls 
    247  1.2.10.2  tls /* True iff the rpc reply is an nfs status ONLY! */
    248  1.2.10.2  tls static int nfsv2_repstat[NFS_V3NPROCS] = {
    249  1.2.10.2  tls 	FALSE,
    250  1.2.10.2  tls 	FALSE,
    251  1.2.10.2  tls 	FALSE,
    252  1.2.10.2  tls 	FALSE,
    253  1.2.10.2  tls 	FALSE,
    254  1.2.10.2  tls 	FALSE,
    255  1.2.10.2  tls 	FALSE,
    256  1.2.10.2  tls 	FALSE,
    257  1.2.10.2  tls 	FALSE,
    258  1.2.10.2  tls 	FALSE,
    259  1.2.10.2  tls 	TRUE,
    260  1.2.10.2  tls 	TRUE,
    261  1.2.10.2  tls 	TRUE,
    262  1.2.10.2  tls 	TRUE,
    263  1.2.10.2  tls 	FALSE,
    264  1.2.10.2  tls 	TRUE,
    265  1.2.10.2  tls 	FALSE,
    266  1.2.10.2  tls 	FALSE,
    267  1.2.10.2  tls 	FALSE,
    268  1.2.10.2  tls 	FALSE,
    269  1.2.10.2  tls 	FALSE,
    270  1.2.10.2  tls 	FALSE,
    271  1.2.10.2  tls };
    272  1.2.10.2  tls 
    273  1.2.10.2  tls /*
    274  1.2.10.2  tls  * Will NFS want to work over IPv6 someday?
    275  1.2.10.2  tls  */
    276  1.2.10.2  tls #define	NETFAMILY(rp) \
    277  1.2.10.2  tls 		(((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
    278  1.2.10.2  tls 
    279  1.2.10.2  tls /* local functions */
    280  1.2.10.2  tls static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
    281  1.2.10.2  tls static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
    282  1.2.10.2  tls static void nfsrc_lock(struct nfsrvcache *rp);
    283  1.2.10.2  tls static void nfsrc_unlock(struct nfsrvcache *rp);
    284  1.2.10.2  tls static void nfsrc_wanted(struct nfsrvcache *rp);
    285  1.2.10.2  tls static void nfsrc_freecache(struct nfsrvcache *rp);
    286  1.2.10.2  tls static void nfsrc_trimcache(u_int64_t, struct socket *);
    287  1.2.10.2  tls static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
    288  1.2.10.2  tls     struct socket *);
    289  1.2.10.2  tls static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
    290  1.2.10.2  tls static void nfsrc_marksametcpconn(u_int64_t);
    291  1.2.10.2  tls 
    292  1.2.10.2  tls /*
    293  1.2.10.2  tls  * Return the correct mutex for this cache entry.
    294  1.2.10.2  tls  */
    295  1.2.10.2  tls static __inline struct mtx *
    296  1.2.10.2  tls nfsrc_cachemutex(struct nfsrvcache *rp)
    297  1.2.10.2  tls {
    298  1.2.10.2  tls 
    299  1.2.10.2  tls 	if ((rp->rc_flag & RC_UDP) != 0)
    300  1.2.10.2  tls 		return (&nfsrc_udpmtx);
    301  1.2.10.2  tls 	return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
    302  1.2.10.2  tls }
    303  1.2.10.2  tls 
    304  1.2.10.2  tls /*
    305  1.2.10.2  tls  * Initialize the server request cache list
    306  1.2.10.2  tls  */
    307  1.2.10.2  tls APPLESTATIC void
    308  1.2.10.2  tls nfsrvd_initcache(void)
    309  1.2.10.2  tls {
    310  1.2.10.2  tls 	int i;
    311  1.2.10.2  tls 	static int inited = 0;
    312  1.2.10.2  tls 
    313  1.2.10.2  tls 	if (inited)
    314  1.2.10.2  tls 		return;
    315  1.2.10.2  tls 	inited = 1;
    316  1.2.10.2  tls 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    317  1.2.10.2  tls 		LIST_INIT(&nfsrvudphashtbl[i]);
    318  1.2.10.2  tls 		LIST_INIT(&nfsrchash_table[i].tbl);
    319  1.2.10.2  tls 	}
    320  1.2.10.2  tls 	TAILQ_INIT(&nfsrvudplru);
    321  1.2.10.2  tls 	nfsrc_tcpsavedreplies = 0;
    322  1.2.10.2  tls 	nfsrc_udpcachesize = 0;
    323  1.2.10.2  tls 	newnfsstats.srvcache_tcppeak = 0;
    324  1.2.10.2  tls 	newnfsstats.srvcache_size = 0;
    325  1.2.10.2  tls }
    326  1.2.10.2  tls 
    327  1.2.10.2  tls /*
    328  1.2.10.2  tls  * Get a cache entry for this request. Basically just malloc a new one
    329  1.2.10.2  tls  * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
    330  1.2.10.2  tls  * Call nfsrc_trimcache() to clean up the cache before returning.
    331  1.2.10.2  tls  */
    332  1.2.10.2  tls APPLESTATIC int
    333  1.2.10.2  tls nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
    334  1.2.10.2  tls {
    335  1.2.10.2  tls 	struct nfsrvcache *newrp;
    336  1.2.10.2  tls 	int ret;
    337  1.2.10.2  tls 
    338  1.2.10.2  tls 	if (nd->nd_procnum == NFSPROC_NULL)
    339  1.2.10.2  tls 		panic("nfsd cache null");
    340  1.2.10.2  tls 	MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
    341  1.2.10.2  tls 	    M_NFSRVCACHE, M_WAITOK);
    342  1.2.10.2  tls 	NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
    343  1.2.10.2  tls 	if (nd->nd_flag & ND_NFSV4)
    344  1.2.10.2  tls 		newrp->rc_flag = RC_NFSV4;
    345  1.2.10.2  tls 	else if (nd->nd_flag & ND_NFSV3)
    346  1.2.10.2  tls 		newrp->rc_flag = RC_NFSV3;
    347  1.2.10.2  tls 	else
    348  1.2.10.2  tls 		newrp->rc_flag = RC_NFSV2;
    349  1.2.10.2  tls 	newrp->rc_xid = nd->nd_retxid;
    350  1.2.10.2  tls 	newrp->rc_proc = nd->nd_procnum;
    351  1.2.10.2  tls 	newrp->rc_sockref = nd->nd_sockref;
    352  1.2.10.2  tls 	newrp->rc_cachetime = nd->nd_tcpconntime;
    353  1.2.10.2  tls 	if (nd->nd_flag & ND_SAMETCPCONN)
    354  1.2.10.2  tls 		newrp->rc_flag |= RC_SAMETCPCONN;
    355  1.2.10.2  tls 	if (nd->nd_nam2 != NULL) {
    356  1.2.10.2  tls 		newrp->rc_flag |= RC_UDP;
    357  1.2.10.2  tls 		ret = nfsrc_getudp(nd, newrp);
    358  1.2.10.2  tls 	} else {
    359  1.2.10.2  tls 		ret = nfsrc_gettcp(nd, newrp);
    360  1.2.10.2  tls 	}
    361  1.2.10.2  tls 	nfsrc_trimcache(nd->nd_sockref, so);
    362  1.2.10.2  tls 	NFSEXITCODE2(0, nd);
    363  1.2.10.2  tls 	return (ret);
    364  1.2.10.2  tls }
    365  1.2.10.2  tls 
    366  1.2.10.2  tls /*
    367  1.2.10.2  tls  * For UDP (v2, v3):
    368  1.2.10.2  tls  * - key on <xid, NFS version, RPC#, Client host ip#>
    369  1.2.10.2  tls  *   (at most one entry for each key)
    370  1.2.10.2  tls  */
    371  1.2.10.2  tls static int
    372  1.2.10.2  tls nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
    373  1.2.10.2  tls {
    374  1.2.10.2  tls 	struct nfsrvcache *rp;
    375  1.2.10.2  tls 	struct sockaddr_in *saddr;
    376  1.2.10.2  tls 	struct sockaddr_in6 *saddr6;
    377  1.2.10.2  tls 	struct nfsrvhashhead *hp;
    378  1.2.10.2  tls 	int ret = 0;
    379  1.2.10.2  tls 	struct mtx *mutex;
    380  1.2.10.2  tls 
    381  1.2.10.2  tls 	mutex = nfsrc_cachemutex(newrp);
    382  1.2.10.2  tls 	hp = NFSRCUDPHASH(newrp->rc_xid);
    383  1.2.10.2  tls loop:
    384  1.2.10.2  tls 	mtx_lock(mutex);
    385  1.2.10.2  tls 	LIST_FOREACH(rp, hp, rc_hash) {
    386  1.2.10.2  tls 	    if (newrp->rc_xid == rp->rc_xid &&
    387  1.2.10.2  tls 		newrp->rc_proc == rp->rc_proc &&
    388  1.2.10.2  tls 		(newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
    389  1.2.10.2  tls 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
    390  1.2.10.2  tls 			if ((rp->rc_flag & RC_LOCKED) != 0) {
    391  1.2.10.2  tls 				rp->rc_flag |= RC_WANTED;
    392  1.2.10.2  tls 				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
    393  1.2.10.2  tls 				    "nfsrc", 10 * hz);
    394  1.2.10.2  tls 				goto loop;
    395  1.2.10.2  tls 			}
    396  1.2.10.2  tls 			if (rp->rc_flag == 0)
    397  1.2.10.2  tls 				panic("nfs udp cache0");
    398  1.2.10.2  tls 			rp->rc_flag |= RC_LOCKED;
    399  1.2.10.2  tls 			TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    400  1.2.10.2  tls 			TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
    401  1.2.10.2  tls 			if (rp->rc_flag & RC_INPROG) {
    402  1.2.10.2  tls 				newnfsstats.srvcache_inproghits++;
    403  1.2.10.2  tls 				mtx_unlock(mutex);
    404  1.2.10.2  tls 				ret = RC_DROPIT;
    405  1.2.10.2  tls 			} else if (rp->rc_flag & RC_REPSTATUS) {
    406  1.2.10.2  tls 				/*
    407  1.2.10.2  tls 				 * V2 only.
    408  1.2.10.2  tls 				 */
    409  1.2.10.2  tls 				newnfsstats.srvcache_nonidemdonehits++;
    410  1.2.10.2  tls 				mtx_unlock(mutex);
    411  1.2.10.2  tls 				nfsrvd_rephead(nd);
    412  1.2.10.2  tls 				*(nd->nd_errp) = rp->rc_status;
    413  1.2.10.2  tls 				ret = RC_REPLY;
    414  1.2.10.2  tls 				rp->rc_timestamp = NFSD_MONOSEC +
    415  1.2.10.2  tls 					NFSRVCACHE_UDPTIMEOUT;
    416  1.2.10.2  tls 			} else if (rp->rc_flag & RC_REPMBUF) {
    417  1.2.10.2  tls 				newnfsstats.srvcache_nonidemdonehits++;
    418  1.2.10.2  tls 				mtx_unlock(mutex);
    419  1.2.10.2  tls 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
    420  1.2.10.2  tls 					M_COPYALL, M_WAITOK);
    421  1.2.10.2  tls 				ret = RC_REPLY;
    422  1.2.10.2  tls 				rp->rc_timestamp = NFSD_MONOSEC +
    423  1.2.10.2  tls 					NFSRVCACHE_UDPTIMEOUT;
    424  1.2.10.2  tls 			} else {
    425  1.2.10.2  tls 				panic("nfs udp cache1");
    426  1.2.10.2  tls 			}
    427  1.2.10.2  tls 			nfsrc_unlock(rp);
    428  1.2.10.2  tls 			free((caddr_t)newrp, M_NFSRVCACHE);
    429  1.2.10.2  tls 			goto out;
    430  1.2.10.2  tls 		}
    431  1.2.10.2  tls 	}
    432  1.2.10.2  tls 	newnfsstats.srvcache_misses++;
    433  1.2.10.2  tls 	atomic_add_int(&newnfsstats.srvcache_size, 1);
    434  1.2.10.2  tls 	nfsrc_udpcachesize++;
    435  1.2.10.2  tls 
    436  1.2.10.2  tls 	newrp->rc_flag |= RC_INPROG;
    437  1.2.10.2  tls 	saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
    438  1.2.10.2  tls 	if (saddr->sin_family == AF_INET)
    439  1.2.10.2  tls 		newrp->rc_inet = saddr->sin_addr.s_addr;
    440  1.2.10.2  tls 	else if (saddr->sin_family == AF_INET6) {
    441  1.2.10.2  tls 		saddr6 = (struct sockaddr_in6 *)saddr;
    442  1.2.10.2  tls 		NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
    443  1.2.10.2  tls 		    sizeof (struct in6_addr));
    444  1.2.10.2  tls 		newrp->rc_flag |= RC_INETIPV6;
    445  1.2.10.2  tls 	}
    446  1.2.10.2  tls 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
    447  1.2.10.2  tls 	TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
    448  1.2.10.2  tls 	mtx_unlock(mutex);
    449  1.2.10.2  tls 	nd->nd_rp = newrp;
    450  1.2.10.2  tls 	ret = RC_DOIT;
    451  1.2.10.2  tls 
    452  1.2.10.2  tls out:
    453  1.2.10.2  tls 	NFSEXITCODE2(0, nd);
    454  1.2.10.2  tls 	return (ret);
    455  1.2.10.2  tls }
    456  1.2.10.2  tls 
    457  1.2.10.2  tls /*
    458  1.2.10.2  tls  * Update a request cache entry after the rpc has been done
    459  1.2.10.2  tls  */
    460  1.2.10.2  tls APPLESTATIC struct nfsrvcache *
    461  1.2.10.2  tls nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
    462  1.2.10.2  tls {
    463  1.2.10.2  tls 	struct nfsrvcache *rp;
    464  1.2.10.2  tls 	struct nfsrvcache *retrp = NULL;
    465  1.2.10.2  tls 	mbuf_t m;
    466  1.2.10.2  tls 	struct mtx *mutex;
    467  1.2.10.2  tls 
    468  1.2.10.2  tls 	rp = nd->nd_rp;
    469  1.2.10.2  tls 	if (!rp)
    470  1.2.10.2  tls 		panic("nfsrvd_updatecache null rp");
    471  1.2.10.2  tls 	nd->nd_rp = NULL;
    472  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    473  1.2.10.2  tls 	mtx_lock(mutex);
    474  1.2.10.2  tls 	nfsrc_lock(rp);
    475  1.2.10.2  tls 	if (!(rp->rc_flag & RC_INPROG))
    476  1.2.10.2  tls 		panic("nfsrvd_updatecache not inprog");
    477  1.2.10.2  tls 	rp->rc_flag &= ~RC_INPROG;
    478  1.2.10.2  tls 	if (rp->rc_flag & RC_UDP) {
    479  1.2.10.2  tls 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    480  1.2.10.2  tls 		TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
    481  1.2.10.2  tls 	}
    482  1.2.10.2  tls 
    483  1.2.10.2  tls 	/*
    484  1.2.10.2  tls 	 * Reply from cache is a special case returned by nfsrv_checkseqid().
    485  1.2.10.2  tls 	 */
    486  1.2.10.2  tls 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
    487  1.2.10.2  tls 		newnfsstats.srvcache_nonidemdonehits++;
    488  1.2.10.2  tls 		mtx_unlock(mutex);
    489  1.2.10.2  tls 		nd->nd_repstat = 0;
    490  1.2.10.2  tls 		if (nd->nd_mreq)
    491  1.2.10.2  tls 			mbuf_freem(nd->nd_mreq);
    492  1.2.10.2  tls 		if (!(rp->rc_flag & RC_REPMBUF))
    493  1.2.10.2  tls 			panic("reply from cache");
    494  1.2.10.2  tls 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
    495  1.2.10.2  tls 		    M_COPYALL, M_WAITOK);
    496  1.2.10.2  tls 		rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    497  1.2.10.2  tls 		nfsrc_unlock(rp);
    498  1.2.10.2  tls 		goto out;
    499  1.2.10.2  tls 	}
    500  1.2.10.2  tls 
    501  1.2.10.2  tls 	/*
    502  1.2.10.2  tls 	 * If rc_refcnt > 0, save it
    503  1.2.10.2  tls 	 * For UDP, save it if ND_SAVEREPLY is set
    504  1.2.10.2  tls 	 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
    505  1.2.10.2  tls 	 */
    506  1.2.10.2  tls 	if (nd->nd_repstat != NFSERR_DONTREPLY &&
    507  1.2.10.2  tls 	    (rp->rc_refcnt > 0 ||
    508  1.2.10.2  tls 	     ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
    509  1.2.10.2  tls 	     ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
    510  1.2.10.2  tls 	      nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
    511  1.2.10.2  tls 	      nfsrc_tcpnonidempotent))) {
    512  1.2.10.2  tls 		if (rp->rc_refcnt > 0) {
    513  1.2.10.2  tls 			if (!(rp->rc_flag & RC_NFSV4))
    514  1.2.10.2  tls 				panic("update_cache refcnt");
    515  1.2.10.2  tls 			rp->rc_flag |= RC_REFCNT;
    516  1.2.10.2  tls 		}
    517  1.2.10.2  tls 		if ((nd->nd_flag & ND_NFSV2) &&
    518  1.2.10.2  tls 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
    519  1.2.10.2  tls 			rp->rc_status = nd->nd_repstat;
    520  1.2.10.2  tls 			rp->rc_flag |= RC_REPSTATUS;
    521  1.2.10.2  tls 			mtx_unlock(mutex);
    522  1.2.10.2  tls 		} else {
    523  1.2.10.2  tls 			if (!(rp->rc_flag & RC_UDP)) {
    524  1.2.10.2  tls 			    atomic_add_int(&nfsrc_tcpsavedreplies, 1);
    525  1.2.10.2  tls 			    if (nfsrc_tcpsavedreplies >
    526  1.2.10.2  tls 				newnfsstats.srvcache_tcppeak)
    527  1.2.10.2  tls 				newnfsstats.srvcache_tcppeak =
    528  1.2.10.2  tls 				    nfsrc_tcpsavedreplies;
    529  1.2.10.2  tls 			}
    530  1.2.10.2  tls 			mtx_unlock(mutex);
    531  1.2.10.2  tls 			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
    532  1.2.10.2  tls 			mtx_lock(mutex);
    533  1.2.10.2  tls 			rp->rc_reply = m;
    534  1.2.10.2  tls 			rp->rc_flag |= RC_REPMBUF;
    535  1.2.10.2  tls 			mtx_unlock(mutex);
    536  1.2.10.2  tls 		}
    537  1.2.10.2  tls 		if (rp->rc_flag & RC_UDP) {
    538  1.2.10.2  tls 			rp->rc_timestamp = NFSD_MONOSEC +
    539  1.2.10.2  tls 			    NFSRVCACHE_UDPTIMEOUT;
    540  1.2.10.2  tls 			nfsrc_unlock(rp);
    541  1.2.10.2  tls 		} else {
    542  1.2.10.2  tls 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    543  1.2.10.2  tls 			if (rp->rc_refcnt > 0)
    544  1.2.10.2  tls 				nfsrc_unlock(rp);
    545  1.2.10.2  tls 			else
    546  1.2.10.2  tls 				retrp = rp;
    547  1.2.10.2  tls 		}
    548  1.2.10.2  tls 	} else {
    549  1.2.10.2  tls 		nfsrc_freecache(rp);
    550  1.2.10.2  tls 		mtx_unlock(mutex);
    551  1.2.10.2  tls 	}
    552  1.2.10.2  tls 
    553  1.2.10.2  tls out:
    554  1.2.10.2  tls 	nfsrc_trimcache(nd->nd_sockref, so);
    555  1.2.10.2  tls 	NFSEXITCODE2(0, nd);
    556  1.2.10.2  tls 	return (retrp);
    557  1.2.10.2  tls }
    558  1.2.10.2  tls 
    559  1.2.10.2  tls /*
    560  1.2.10.2  tls  * Invalidate and, if possible, free an in prog cache entry.
    561  1.2.10.2  tls  * Must not sleep.
    562  1.2.10.2  tls  */
    563  1.2.10.2  tls APPLESTATIC void
    564  1.2.10.2  tls nfsrvd_delcache(struct nfsrvcache *rp)
    565  1.2.10.2  tls {
    566  1.2.10.2  tls 	struct mtx *mutex;
    567  1.2.10.2  tls 
    568  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    569  1.2.10.2  tls 	if (!(rp->rc_flag & RC_INPROG))
    570  1.2.10.2  tls 		panic("nfsrvd_delcache not in prog");
    571  1.2.10.2  tls 	mtx_lock(mutex);
    572  1.2.10.2  tls 	rp->rc_flag &= ~RC_INPROG;
    573  1.2.10.2  tls 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
    574  1.2.10.2  tls 		nfsrc_freecache(rp);
    575  1.2.10.2  tls 	mtx_unlock(mutex);
    576  1.2.10.2  tls }
    577  1.2.10.2  tls 
    578  1.2.10.2  tls /*
    579  1.2.10.2  tls  * Called after nfsrvd_updatecache() once the reply is sent, to update
    580  1.2.10.2  tls  * the entry for nfsrc_activesocket() and unlock it. The argument is
    581  1.2.10.2  tls  * the pointer returned by nfsrvd_updatecache().
    582  1.2.10.2  tls  */
    583  1.2.10.2  tls APPLESTATIC void
    584  1.2.10.2  tls nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
    585  1.2.10.2  tls {
    586  1.2.10.2  tls 	tcp_seq tmp_seq;
    587  1.2.10.2  tls 	struct mtx *mutex;
    588  1.2.10.2  tls 
    589  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    590  1.2.10.2  tls 	if (!(rp->rc_flag & RC_LOCKED))
    591  1.2.10.2  tls 		panic("nfsrvd_sentcache not locked");
    592  1.2.10.2  tls 	if (!err) {
    593  1.2.10.2  tls 		if ((so->so_proto->pr_domain->dom_family != AF_INET &&
    594  1.2.10.2  tls 		     so->so_proto->pr_domain->dom_family != AF_INET6) ||
    595  1.2.10.2  tls 		     so->so_proto->pr_protocol != IPPROTO_TCP)
    596  1.2.10.2  tls 			panic("nfs sent cache");
    597  1.2.10.2  tls 		if (nfsrv_getsockseqnum(so, &tmp_seq)) {
    598  1.2.10.2  tls 			mtx_lock(mutex);
    599  1.2.10.2  tls 			rp->rc_tcpseq = tmp_seq;
    600  1.2.10.2  tls 			rp->rc_flag |= RC_TCPSEQ;
    601  1.2.10.2  tls 			mtx_unlock(mutex);
    602  1.2.10.2  tls 		}
    603  1.2.10.2  tls 	}
    604  1.2.10.2  tls 	nfsrc_unlock(rp);
    605  1.2.10.2  tls }
    606  1.2.10.2  tls 
    607  1.2.10.2  tls /*
    608  1.2.10.2  tls  * Get a cache entry for TCP
    609  1.2.10.2  tls  * - key on <xid, nfs version>
    610  1.2.10.2  tls  *   (allow multiple entries for a given key)
    611  1.2.10.2  tls  */
    612  1.2.10.2  tls static int
    613  1.2.10.2  tls nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
    614  1.2.10.2  tls {
    615  1.2.10.2  tls 	struct nfsrvcache *rp, *nextrp;
    616  1.2.10.2  tls 	int i;
    617  1.2.10.2  tls 	struct nfsrvcache *hitrp;
    618  1.2.10.2  tls 	struct nfsrvhashhead *hp, nfsrc_templist;
    619  1.2.10.2  tls 	int hit, ret = 0;
    620  1.2.10.2  tls 	struct mtx *mutex;
    621  1.2.10.2  tls 
    622  1.2.10.2  tls 	mutex = nfsrc_cachemutex(newrp);
    623  1.2.10.2  tls 	hp = NFSRCHASH(newrp->rc_xid);
    624  1.2.10.2  tls 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
    625  1.2.10.2  tls tryagain:
    626  1.2.10.2  tls 	mtx_lock(mutex);
    627  1.2.10.2  tls 	hit = 1;
    628  1.2.10.2  tls 	LIST_INIT(&nfsrc_templist);
    629  1.2.10.2  tls 	/*
    630  1.2.10.2  tls 	 * Get all the matches and put them on the temp list.
    631  1.2.10.2  tls 	 */
    632  1.2.10.2  tls 	rp = LIST_FIRST(hp);
    633  1.2.10.2  tls 	while (rp != NULL) {
    634  1.2.10.2  tls 		nextrp = LIST_NEXT(rp, rc_hash);
    635  1.2.10.2  tls 		if (newrp->rc_xid == rp->rc_xid &&
    636  1.2.10.2  tls 		    (!(rp->rc_flag & RC_INPROG) ||
    637  1.2.10.2  tls 		     ((newrp->rc_flag & RC_SAMETCPCONN) &&
    638  1.2.10.2  tls 		      newrp->rc_sockref == rp->rc_sockref)) &&
    639  1.2.10.2  tls 		    (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
    640  1.2.10.2  tls 		    newrp->rc_proc == rp->rc_proc &&
    641  1.2.10.2  tls 		    ((newrp->rc_flag & RC_NFSV4) &&
    642  1.2.10.2  tls 		     newrp->rc_sockref != rp->rc_sockref &&
    643  1.2.10.2  tls 		     newrp->rc_cachetime >= rp->rc_cachetime)
    644  1.2.10.2  tls 		    && newrp->rc_reqlen == rp->rc_reqlen &&
    645  1.2.10.2  tls 		    newrp->rc_cksum == rp->rc_cksum) {
    646  1.2.10.2  tls 			LIST_REMOVE(rp, rc_hash);
    647  1.2.10.2  tls 			LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
    648  1.2.10.2  tls 		}
    649  1.2.10.2  tls 		rp = nextrp;
    650  1.2.10.2  tls 	}
    651  1.2.10.2  tls 
    652  1.2.10.2  tls 	/*
    653  1.2.10.2  tls 	 * Now, use nfsrc_templist to decide if there is a match.
    654  1.2.10.2  tls 	 */
    655  1.2.10.2  tls 	i = 0;
    656  1.2.10.2  tls 	LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
    657  1.2.10.2  tls 		i++;
    658  1.2.10.2  tls 		if (rp->rc_refcnt > 0) {
    659  1.2.10.2  tls 			hit = 0;
    660  1.2.10.2  tls 			break;
    661  1.2.10.2  tls 		}
    662  1.2.10.2  tls 	}
    663  1.2.10.2  tls 	/*
    664  1.2.10.2  tls 	 * Can be a hit only if one entry left.
    665  1.2.10.2  tls 	 * Note possible hit entry and put nfsrc_templist back on hash
    666  1.2.10.2  tls 	 * list.
    667  1.2.10.2  tls 	 */
    668  1.2.10.2  tls 	if (i != 1)
    669  1.2.10.2  tls 		hit = 0;
    670  1.2.10.2  tls 	hitrp = rp = LIST_FIRST(&nfsrc_templist);
    671  1.2.10.2  tls 	while (rp != NULL) {
    672  1.2.10.2  tls 		nextrp = LIST_NEXT(rp, rc_hash);
    673  1.2.10.2  tls 		LIST_REMOVE(rp, rc_hash);
    674  1.2.10.2  tls 		LIST_INSERT_HEAD(hp, rp, rc_hash);
    675  1.2.10.2  tls 		rp = nextrp;
    676  1.2.10.2  tls 	}
    677  1.2.10.2  tls 	if (LIST_FIRST(&nfsrc_templist) != NULL)
    678  1.2.10.2  tls 		panic("nfs gettcp cache templist");
    679  1.2.10.2  tls 
    680  1.2.10.2  tls 	if (hit) {
    681  1.2.10.2  tls 		rp = hitrp;
    682  1.2.10.2  tls 		if ((rp->rc_flag & RC_LOCKED) != 0) {
    683  1.2.10.2  tls 			rp->rc_flag |= RC_WANTED;
    684  1.2.10.2  tls 			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
    685  1.2.10.2  tls 			    "nfsrc", 10 * hz);
    686  1.2.10.2  tls 			goto tryagain;
    687  1.2.10.2  tls 		}
    688  1.2.10.2  tls 		if (rp->rc_flag == 0)
    689  1.2.10.2  tls 			panic("nfs tcp cache0");
    690  1.2.10.2  tls 		rp->rc_flag |= RC_LOCKED;
    691  1.2.10.2  tls 		if (rp->rc_flag & RC_INPROG) {
    692  1.2.10.2  tls 			newnfsstats.srvcache_inproghits++;
    693  1.2.10.2  tls 			mtx_unlock(mutex);
    694  1.2.10.2  tls 			if (newrp->rc_sockref == rp->rc_sockref)
    695  1.2.10.2  tls 				nfsrc_marksametcpconn(rp->rc_sockref);
    696  1.2.10.2  tls 			ret = RC_DROPIT;
    697  1.2.10.2  tls 		} else if (rp->rc_flag & RC_REPSTATUS) {
    698  1.2.10.2  tls 			/*
    699  1.2.10.2  tls 			 * V2 only.
    700  1.2.10.2  tls 			 */
    701  1.2.10.2  tls 			newnfsstats.srvcache_nonidemdonehits++;
    702  1.2.10.2  tls 			mtx_unlock(mutex);
    703  1.2.10.2  tls 			if (newrp->rc_sockref == rp->rc_sockref)
    704  1.2.10.2  tls 				nfsrc_marksametcpconn(rp->rc_sockref);
    705  1.2.10.2  tls 			ret = RC_REPLY;
    706  1.2.10.2  tls 			nfsrvd_rephead(nd);
    707  1.2.10.2  tls 			*(nd->nd_errp) = rp->rc_status;
    708  1.2.10.2  tls 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    709  1.2.10.2  tls 		} else if (rp->rc_flag & RC_REPMBUF) {
    710  1.2.10.2  tls 			newnfsstats.srvcache_nonidemdonehits++;
    711  1.2.10.2  tls 			mtx_unlock(mutex);
    712  1.2.10.2  tls 			if (newrp->rc_sockref == rp->rc_sockref)
    713  1.2.10.2  tls 				nfsrc_marksametcpconn(rp->rc_sockref);
    714  1.2.10.2  tls 			ret = RC_REPLY;
    715  1.2.10.2  tls 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
    716  1.2.10.2  tls 				M_COPYALL, M_WAITOK);
    717  1.2.10.2  tls 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    718  1.2.10.2  tls 		} else {
    719  1.2.10.2  tls 			panic("nfs tcp cache1");
    720  1.2.10.2  tls 		}
    721  1.2.10.2  tls 		nfsrc_unlock(rp);
    722  1.2.10.2  tls 		free((caddr_t)newrp, M_NFSRVCACHE);
    723  1.2.10.2  tls 		goto out;
    724  1.2.10.2  tls 	}
    725  1.2.10.2  tls 	newnfsstats.srvcache_misses++;
    726  1.2.10.2  tls 	atomic_add_int(&newnfsstats.srvcache_size, 1);
    727  1.2.10.2  tls 
    728  1.2.10.2  tls 	/*
    729  1.2.10.2  tls 	 * For TCP, multiple entries for a key are allowed, so don't
    730  1.2.10.2  tls 	 * chain it into the hash table until done.
    731  1.2.10.2  tls 	 */
    732  1.2.10.2  tls 	newrp->rc_cachetime = NFSD_MONOSEC;
    733  1.2.10.2  tls 	newrp->rc_flag |= RC_INPROG;
    734  1.2.10.2  tls 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
    735  1.2.10.2  tls 	mtx_unlock(mutex);
    736  1.2.10.2  tls 	nd->nd_rp = newrp;
    737  1.2.10.2  tls 	ret = RC_DOIT;
    738  1.2.10.2  tls 
    739  1.2.10.2  tls out:
    740  1.2.10.2  tls 	NFSEXITCODE2(0, nd);
    741  1.2.10.2  tls 	return (ret);
    742  1.2.10.2  tls }
    743  1.2.10.2  tls 
    744  1.2.10.2  tls /*
    745  1.2.10.2  tls  * Lock a cache entry.
    746  1.2.10.2  tls  */
    747  1.2.10.2  tls static void
    748  1.2.10.2  tls nfsrc_lock(struct nfsrvcache *rp)
    749  1.2.10.2  tls {
    750  1.2.10.2  tls 	struct mtx *mutex;
    751  1.2.10.2  tls 
    752  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    753  1.2.10.2  tls 	mtx_assert(mutex, MA_OWNED);
    754  1.2.10.2  tls 	while ((rp->rc_flag & RC_LOCKED) != 0) {
    755  1.2.10.2  tls 		rp->rc_flag |= RC_WANTED;
    756  1.2.10.2  tls 		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
    757  1.2.10.2  tls 	}
    758  1.2.10.2  tls 	rp->rc_flag |= RC_LOCKED;
    759  1.2.10.2  tls }
    760  1.2.10.2  tls 
    761  1.2.10.2  tls /*
    762  1.2.10.2  tls  * Unlock a cache entry.
    763  1.2.10.2  tls  */
    764  1.2.10.2  tls static void
    765  1.2.10.2  tls nfsrc_unlock(struct nfsrvcache *rp)
    766  1.2.10.2  tls {
    767  1.2.10.2  tls 	struct mtx *mutex;
    768  1.2.10.2  tls 
    769  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    770  1.2.10.2  tls 	mtx_lock(mutex);
    771  1.2.10.2  tls 	rp->rc_flag &= ~RC_LOCKED;
    772  1.2.10.2  tls 	nfsrc_wanted(rp);
    773  1.2.10.2  tls 	mtx_unlock(mutex);
    774  1.2.10.2  tls }
    775  1.2.10.2  tls 
    776  1.2.10.2  tls /*
    777  1.2.10.2  tls  * Wakeup anyone wanting entry.
    778  1.2.10.2  tls  */
    779  1.2.10.2  tls static void
    780  1.2.10.2  tls nfsrc_wanted(struct nfsrvcache *rp)
    781  1.2.10.2  tls {
    782  1.2.10.2  tls 	if (rp->rc_flag & RC_WANTED) {
    783  1.2.10.2  tls 		rp->rc_flag &= ~RC_WANTED;
    784  1.2.10.2  tls 		wakeup((caddr_t)rp);
    785  1.2.10.2  tls 	}
    786  1.2.10.2  tls }
    787  1.2.10.2  tls 
    788  1.2.10.2  tls /*
    789  1.2.10.2  tls  * Free up the entry.
    790  1.2.10.2  tls  * Must not sleep.
    791  1.2.10.2  tls  */
    792  1.2.10.2  tls static void
    793  1.2.10.2  tls nfsrc_freecache(struct nfsrvcache *rp)
    794  1.2.10.2  tls {
    795  1.2.10.2  tls 
    796  1.2.10.2  tls 	LIST_REMOVE(rp, rc_hash);
    797  1.2.10.2  tls 	if (rp->rc_flag & RC_UDP) {
    798  1.2.10.2  tls 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    799  1.2.10.2  tls 		nfsrc_udpcachesize--;
    800  1.2.10.2  tls 	}
    801  1.2.10.2  tls 	nfsrc_wanted(rp);
    802  1.2.10.2  tls 	if (rp->rc_flag & RC_REPMBUF) {
    803  1.2.10.2  tls 		mbuf_freem(rp->rc_reply);
    804  1.2.10.2  tls 		if (!(rp->rc_flag & RC_UDP))
    805  1.2.10.2  tls 			atomic_add_int(&nfsrc_tcpsavedreplies, -1);
    806  1.2.10.2  tls 	}
    807  1.2.10.2  tls 	FREE((caddr_t)rp, M_NFSRVCACHE);
    808  1.2.10.2  tls 	atomic_add_int(&newnfsstats.srvcache_size, -1);
    809  1.2.10.2  tls }
    810  1.2.10.2  tls 
    811  1.2.10.2  tls /*
    812  1.2.10.2  tls  * Clean out the cache. Called when nfsserver module is unloaded.
    813  1.2.10.2  tls  */
    814  1.2.10.2  tls APPLESTATIC void
    815  1.2.10.2  tls nfsrvd_cleancache(void)
    816  1.2.10.2  tls {
    817  1.2.10.2  tls 	struct nfsrvcache *rp, *nextrp;
    818  1.2.10.2  tls 	int i;
    819  1.2.10.2  tls 
    820  1.2.10.2  tls 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    821  1.2.10.2  tls 		mtx_lock(&nfsrchash_table[i].mtx);
    822  1.2.10.2  tls 		LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
    823  1.2.10.2  tls 			nfsrc_freecache(rp);
    824  1.2.10.2  tls 		mtx_unlock(&nfsrchash_table[i].mtx);
    825  1.2.10.2  tls 	}
    826  1.2.10.2  tls 	mtx_lock(&nfsrc_udpmtx);
    827  1.2.10.2  tls 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    828  1.2.10.2  tls 		LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
    829  1.2.10.2  tls 			nfsrc_freecache(rp);
    830  1.2.10.2  tls 		}
    831  1.2.10.2  tls 	}
    832  1.2.10.2  tls 	newnfsstats.srvcache_size = 0;
    833  1.2.10.2  tls 	mtx_unlock(&nfsrc_udpmtx);
    834  1.2.10.2  tls 	nfsrc_tcpsavedreplies = 0;
    835  1.2.10.2  tls }
    836  1.2.10.2  tls 
    837  1.2.10.2  tls /*
    838  1.2.10.2  tls  * The basic rule is to get rid of entries that are expired.
    839  1.2.10.2  tls  */
    840  1.2.10.2  tls static void
    841  1.2.10.2  tls nfsrc_trimcache(u_int64_t sockref, struct socket *so)
    842  1.2.10.2  tls {
    843  1.2.10.2  tls 	struct nfsrvcache *rp, *nextrp;
    844  1.2.10.2  tls 	int i, j, k, time_histo[10];
    845  1.2.10.2  tls 	time_t thisstamp;
    846  1.2.10.2  tls 	static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
    847  1.2.10.2  tls 	static int onethread = 0;
    848  1.2.10.2  tls 
    849  1.2.10.2  tls 	if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
    850  1.2.10.2  tls 		return;
    851  1.2.10.2  tls 	if (NFSD_MONOSEC != udp_lasttrim ||
    852  1.2.10.2  tls 	    nfsrc_udpcachesize >= (nfsrc_udphighwater +
    853  1.2.10.2  tls 	    nfsrc_udphighwater / 2)) {
    854  1.2.10.2  tls 		mtx_lock(&nfsrc_udpmtx);
    855  1.2.10.2  tls 		udp_lasttrim = NFSD_MONOSEC;
    856  1.2.10.2  tls 		TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
    857  1.2.10.2  tls 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
    858  1.2.10.2  tls 			     && rp->rc_refcnt == 0
    859  1.2.10.2  tls 			     && ((rp->rc_flag & RC_REFCNT) ||
    860  1.2.10.2  tls 				 udp_lasttrim > rp->rc_timestamp ||
    861  1.2.10.2  tls 				 nfsrc_udpcachesize > nfsrc_udphighwater))
    862  1.2.10.2  tls 				nfsrc_freecache(rp);
    863  1.2.10.2  tls 		}
    864  1.2.10.2  tls 		mtx_unlock(&nfsrc_udpmtx);
    865  1.2.10.2  tls 	}
    866  1.2.10.2  tls 	if (NFSD_MONOSEC != tcp_lasttrim ||
    867  1.2.10.2  tls 	    nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
    868  1.2.10.2  tls 		for (i = 0; i < 10; i++)
    869  1.2.10.2  tls 			time_histo[i] = 0;
    870  1.2.10.2  tls 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    871  1.2.10.2  tls 			mtx_lock(&nfsrchash_table[i].mtx);
    872  1.2.10.2  tls 			if (i == 0)
    873  1.2.10.2  tls 				tcp_lasttrim = NFSD_MONOSEC;
    874  1.2.10.2  tls 			LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
    875  1.2.10.2  tls 			    nextrp) {
    876  1.2.10.2  tls 				if (!(rp->rc_flag &
    877  1.2.10.2  tls 				     (RC_INPROG|RC_LOCKED|RC_WANTED))
    878  1.2.10.2  tls 				     && rp->rc_refcnt == 0) {
    879  1.2.10.2  tls 					/*
    880  1.2.10.2  tls 					 * The timestamps range from roughly the
    881  1.2.10.2  tls 					 * present (tcp_lasttrim) to the present
    882  1.2.10.2  tls 					 * + nfsrc_tcptimeout. Generate a simple
    883  1.2.10.2  tls 					 * histogram of where the timeouts fall.
    884  1.2.10.2  tls 					 */
    885  1.2.10.2  tls 					j = rp->rc_timestamp - tcp_lasttrim;
    886  1.2.10.2  tls 					if (j >= nfsrc_tcptimeout)
    887  1.2.10.2  tls 						j = nfsrc_tcptimeout - 1;
    888  1.2.10.2  tls 					if (j < 0)
    889  1.2.10.2  tls 						j = 0;
    890  1.2.10.2  tls 					j = (j * 10 / nfsrc_tcptimeout) % 10;
    891  1.2.10.2  tls 					time_histo[j]++;
    892  1.2.10.2  tls 					if ((rp->rc_flag & RC_REFCNT) ||
    893  1.2.10.2  tls 					    tcp_lasttrim > rp->rc_timestamp ||
    894  1.2.10.2  tls 					    nfsrc_activesocket(rp, sockref, so))
    895  1.2.10.2  tls 						nfsrc_freecache(rp);
    896  1.2.10.2  tls 				}
    897  1.2.10.2  tls 			}
    898  1.2.10.2  tls 			mtx_unlock(&nfsrchash_table[i].mtx);
    899  1.2.10.2  tls 		}
    900  1.2.10.2  tls 		j = nfsrc_tcphighwater / 5;	/* 20% of it */
    901  1.2.10.2  tls 		if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) {
    902  1.2.10.2  tls 			/*
    903  1.2.10.2  tls 			 * Trim some more with a smaller timeout of as little
    904  1.2.10.2  tls 			 * as 20% of nfsrc_tcptimeout to try and get below
    905  1.2.10.2  tls 			 * 80% of the nfsrc_tcphighwater.
    906  1.2.10.2  tls 			 */
    907  1.2.10.2  tls 			k = 0;
    908  1.2.10.2  tls 			for (i = 0; i < 8; i++) {
    909  1.2.10.2  tls 				k += time_histo[i];
    910  1.2.10.2  tls 				if (k > j)
    911  1.2.10.2  tls 					break;
    912  1.2.10.2  tls 			}
    913  1.2.10.2  tls 			k = nfsrc_tcptimeout * (i + 1) / 10;
    914  1.2.10.2  tls 			if (k < 1)
    915  1.2.10.2  tls 				k = 1;
    916  1.2.10.2  tls 			thisstamp = tcp_lasttrim + k;
    917  1.2.10.2  tls 			for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    918  1.2.10.2  tls 				mtx_lock(&nfsrchash_table[i].mtx);
    919  1.2.10.2  tls 				LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
    920  1.2.10.2  tls 				    rc_hash, nextrp) {
    921  1.2.10.2  tls 					if (!(rp->rc_flag &
    922  1.2.10.2  tls 					     (RC_INPROG|RC_LOCKED|RC_WANTED))
    923  1.2.10.2  tls 					     && rp->rc_refcnt == 0
    924  1.2.10.2  tls 					     && ((rp->rc_flag & RC_REFCNT) ||
    925  1.2.10.2  tls 						 thisstamp > rp->rc_timestamp ||
    926  1.2.10.2  tls 						 nfsrc_activesocket(rp, sockref,
    927  1.2.10.2  tls 						    so)))
    928  1.2.10.2  tls 						nfsrc_freecache(rp);
    929  1.2.10.2  tls 				}
    930  1.2.10.2  tls 				mtx_unlock(&nfsrchash_table[i].mtx);
    931  1.2.10.2  tls 			}
    932  1.2.10.2  tls 		}
    933  1.2.10.2  tls 	}
    934  1.2.10.2  tls 	atomic_store_rel_int(&onethread, 0);
    935  1.2.10.2  tls }
    936  1.2.10.2  tls 
    937  1.2.10.2  tls /*
    938  1.2.10.2  tls  * Add a seqid# reference to the cache entry.
    939  1.2.10.2  tls  */
    940  1.2.10.2  tls APPLESTATIC void
    941  1.2.10.2  tls nfsrvd_refcache(struct nfsrvcache *rp)
    942  1.2.10.2  tls {
    943  1.2.10.2  tls 	struct mtx *mutex;
    944  1.2.10.2  tls 
    945  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    946  1.2.10.2  tls 	mtx_lock(mutex);
    947  1.2.10.2  tls 	if (rp->rc_refcnt < 0)
    948  1.2.10.2  tls 		panic("nfs cache refcnt");
    949  1.2.10.2  tls 	rp->rc_refcnt++;
    950  1.2.10.2  tls 	mtx_unlock(mutex);
    951  1.2.10.2  tls }
    952  1.2.10.2  tls 
    953  1.2.10.2  tls /*
    954  1.2.10.2  tls  * Dereference a seqid# cache entry.
    955  1.2.10.2  tls  */
    956  1.2.10.2  tls APPLESTATIC void
    957  1.2.10.2  tls nfsrvd_derefcache(struct nfsrvcache *rp)
    958  1.2.10.2  tls {
    959  1.2.10.2  tls 	struct mtx *mutex;
    960  1.2.10.2  tls 
    961  1.2.10.2  tls 	mutex = nfsrc_cachemutex(rp);
    962  1.2.10.2  tls 	mtx_lock(mutex);
    963  1.2.10.2  tls 	if (rp->rc_refcnt <= 0)
    964  1.2.10.2  tls 		panic("nfs cache derefcnt");
    965  1.2.10.2  tls 	rp->rc_refcnt--;
    966  1.2.10.2  tls 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
    967  1.2.10.2  tls 		nfsrc_freecache(rp);
    968  1.2.10.2  tls 	mtx_unlock(mutex);
    969  1.2.10.2  tls }
    970  1.2.10.2  tls 
    971  1.2.10.2  tls /*
    972  1.2.10.2  tls  * Check to see if the socket is active.
    973  1.2.10.2  tls  * Return 1 if the reply has been received/acknowledged by the client,
    974  1.2.10.2  tls  * 0 otherwise.
    975  1.2.10.2  tls  * XXX - Uses tcp internals.
    976  1.2.10.2  tls  */
    977  1.2.10.2  tls static int
    978  1.2.10.2  tls nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
    979  1.2.10.2  tls     struct socket *cur_so)
    980  1.2.10.2  tls {
    981  1.2.10.2  tls 	int ret = 0;
    982  1.2.10.2  tls 
    983  1.2.10.2  tls 	if (!(rp->rc_flag & RC_TCPSEQ))
    984  1.2.10.2  tls 		return (ret);
    985  1.2.10.2  tls 	/*
    986  1.2.10.2  tls 	 * If the sockref is the same, it is the same TCP connection.
    987  1.2.10.2  tls 	 */
    988  1.2.10.2  tls 	if (cur_sockref == rp->rc_sockref)
    989  1.2.10.2  tls 		ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
    990  1.2.10.2  tls 	return (ret);
    991  1.2.10.2  tls }
    992  1.2.10.2  tls 
    993  1.2.10.2  tls /*
    994  1.2.10.2  tls  * Calculate the length of the mbuf list and a checksum on the first up to
    995  1.2.10.2  tls  * NFSRVCACHE_CHECKLEN bytes.
    996  1.2.10.2  tls  */
    997  1.2.10.2  tls static int
    998  1.2.10.2  tls nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
    999  1.2.10.2  tls {
   1000  1.2.10.2  tls 	int len = 0, cklen;
   1001  1.2.10.2  tls 	mbuf_t m;
   1002  1.2.10.2  tls 
   1003  1.2.10.2  tls 	m = m1;
   1004  1.2.10.2  tls 	while (m) {
   1005  1.2.10.2  tls 		len += mbuf_len(m);
   1006  1.2.10.2  tls 		m = mbuf_next(m);
   1007  1.2.10.2  tls 	}
   1008  1.2.10.2  tls 	cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
   1009  1.2.10.2  tls 	*cksum = in_cksum(m1, cklen);
   1010  1.2.10.2  tls 	return (len);
   1011  1.2.10.2  tls }
   1012  1.2.10.2  tls 
   1013  1.2.10.2  tls /*
   1014  1.2.10.2  tls  * Mark a TCP connection that is seeing retries. Should never happen for
   1015  1.2.10.2  tls  * NFSv4.
   1016  1.2.10.2  tls  */
   1017  1.2.10.2  tls static void
   1018  1.2.10.2  tls nfsrc_marksametcpconn(u_int64_t sockref)
   1019  1.2.10.2  tls {
   1020  1.2.10.2  tls }
   1021  1.2.10.2  tls 
   1022