Home | History | Annotate | Line # | Download | only in server
      1  1.5       rin /*	$NetBSD: nfs_nfsdcache.c,v 1.5 2024/07/05 04:31:52 rin Exp $	*/
      2  1.1  dholland /*-
      3  1.1  dholland  * Copyright (c) 1989, 1993
      4  1.1  dholland  *	The Regents of the University of California.  All rights reserved.
      5  1.1  dholland  *
      6  1.1  dholland  * This code is derived from software contributed to Berkeley by
      7  1.1  dholland  * Rick Macklem at The University of Guelph.
      8  1.1  dholland  *
      9  1.1  dholland  * Redistribution and use in source and binary forms, with or without
     10  1.1  dholland  * modification, are permitted provided that the following conditions
     11  1.1  dholland  * are met:
     12  1.1  dholland  * 1. Redistributions of source code must retain the above copyright
     13  1.1  dholland  *    notice, this list of conditions and the following disclaimer.
     14  1.1  dholland  * 2. Redistributions in binary form must reproduce the above copyright
     15  1.1  dholland  *    notice, this list of conditions and the following disclaimer in the
     16  1.1  dholland  *    documentation and/or other materials provided with the distribution.
     17  1.1  dholland  * 4. Neither the name of the University nor the names of its contributors
     18  1.1  dholland  *    may be used to endorse or promote products derived from this software
     19  1.1  dholland  *    without specific prior written permission.
     20  1.1  dholland  *
     21  1.1  dholland  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  1.1  dholland  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  1.1  dholland  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  1.1  dholland  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  1.1  dholland  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  1.1  dholland  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  1.1  dholland  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  1.1  dholland  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  1.1  dholland  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  1.1  dholland  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  1.1  dholland  * SUCH DAMAGE.
     32  1.1  dholland  *
     33  1.1  dholland  */
     34  1.1  dholland 
     35  1.1  dholland #include <sys/cdefs.h>
     36  1.3  pgoyette /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 304026 2016-08-12 22:44:59Z rmacklem "); */
     37  1.5       rin __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.5 2024/07/05 04:31:52 rin Exp $");
     38  1.1  dholland 
     39  1.1  dholland /*
     40  1.1  dholland  * Here is the basic algorithm:
     41  1.1  dholland  * First, some design criteria I used:
     42  1.1  dholland  * - I think a false hit is more serious than a false miss
     43  1.1  dholland  * - A false hit for an RPC that has Op(s) that order via seqid# must be
     44  1.1  dholland  *   avoided at all cost
     45  1.1  dholland  * - A valid hit will probably happen a long time after the original reply
     46  1.1  dholland  *   and the TCP socket that the original request was received on will no
     47  1.1  dholland  *   longer be active
     48  1.1  dholland  *   (The long time delay implies to me that LRU is not appropriate.)
     49  1.1  dholland  * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
     50  1.1  dholland  *   in them as well as minimizing the risk of redoing retried non-idempotent
     51  1.1  dholland  *   Ops.
     52  1.1  dholland  * Because it is biased towards avoiding false hits, multiple entries with
     53  1.1  dholland  * the same xid are to be expected, especially for the case of the entry
     54  1.1  dholland  * in the cache being related to a seqid# sequenced Op.
     55  1.1  dholland  *
     56  1.1  dholland  * The basic algorithm I'm about to code up:
     57  1.1  dholland  * - Null RPCs bypass the cache and are just done
     58  1.1  dholland  * For TCP
     59  1.1  dholland  * 	- key on <xid, NFS version> (as noted above, there can be several
     60  1.1  dholland  * 				     entries with the same key)
     61  1.1  dholland  * 	When a request arrives:
     62  1.1  dholland  * 		For all that match key
     63  1.1  dholland  * 		- if RPC# != OR request_size !=
     64  1.1  dholland  * 			- not a match with this one
     65  1.1  dholland  * 		- if NFSv4 and received on same TCP socket OR
     66  1.1  dholland  *			received on a TCP connection created before the
     67  1.1  dholland  *			entry was cached
     68  1.1  dholland  * 			- not a match with this one
     69  1.1  dholland  * 			(V2,3 clients might retry on same TCP socket)
     70  1.1  dholland  * 		- calculate checksum on first N bytes of NFS XDR
     71  1.1  dholland  * 		- if checksum !=
     72  1.1  dholland  * 			- not a match for this one
     73  1.1  dholland  * 		If any of the remaining ones that match has a
     74  1.1  dholland  * 			seqid_refcnt > 0
     75  1.1  dholland  * 			- not a match (go do RPC, using new cache entry)
     76  1.1  dholland  * 		If one match left
     77  1.1  dholland  * 			- a hit (reply from cache)
     78  1.1  dholland  * 		else
     79  1.1  dholland  * 			- miss (go do RPC, using new cache entry)
     80  1.1  dholland  *
     81  1.1  dholland  * 	During processing of NFSv4 request:
     82  1.1  dholland  * 		- set a flag when a non-idempotent Op is processed
     83  1.1  dholland  * 		- when an Op that uses a seqid# (Open,...) is processed
     84  1.1  dholland  * 			- if same seqid# as referenced entry in cache
     85  1.1  dholland  * 				- free new cache entry
     86  1.1  dholland  * 				- reply from referenced cache entry
     87  1.1  dholland  * 			  else if next seqid# in order
     88  1.1  dholland  * 				- free referenced cache entry
     89  1.1  dholland  * 				- increment seqid_refcnt on new cache entry
     90  1.1  dholland  * 				- set pointer from Openowner/Lockowner to
     91  1.1  dholland  * 					new cache entry (aka reference it)
     92  1.1  dholland  * 			  else if first seqid# in sequence
     93  1.1  dholland  * 				- increment seqid_refcnt on new cache entry
     94  1.1  dholland  * 				- set pointer from Openowner/Lockowner to
     95  1.1  dholland  * 					new cache entry (aka reference it)
     96  1.1  dholland  *
     97  1.1  dholland  * 	At end of RPC processing:
     98  1.1  dholland  * 		- if seqid_refcnt > 0 OR flagged non-idempotent on new
     99  1.1  dholland  * 			cache entry
    100  1.1  dholland  * 			- save reply in cache entry
    101  1.1  dholland  * 			- calculate checksum on first N bytes of NFS XDR
    102  1.1  dholland  * 				request
    103  1.1  dholland  * 			- note op and length of XDR request (in bytes)
    104  1.1  dholland  * 			- timestamp it
    105  1.1  dholland  * 		  else
    106  1.1  dholland  * 			- free new cache entry
    107  1.1  dholland  * 		- Send reply (noting info for socket activity check, below)
    108  1.1  dholland  *
    109  1.1  dholland  * 	For cache entries saved above:
    110  1.1  dholland  * 		- if saved since seqid_refcnt was > 0
    111  1.1  dholland  * 			- free when seqid_refcnt decrements to 0
    112  1.1  dholland  * 			  (when next one in sequence is processed above, or
    113  1.1  dholland  * 			   when Openowner/Lockowner is discarded)
    114  1.1  dholland  * 		  else { non-idempotent Op(s) }
    115  1.1  dholland  * 			- free when
    116  1.1  dholland  * 				- some further activity observed on same
    117  1.1  dholland  * 					socket
    118  1.1  dholland  * 				  (I'm not yet sure how I'm going to do
    119  1.1  dholland  * 				   this. Maybe look at the TCP connection
    120  1.1  dholland  * 				   to see if the send_tcp_sequence# is well
    121  1.1  dholland  * 				   past sent reply OR K additional RPCs
    122  1.1  dholland  * 				   replied on same socket OR?)
    123  1.1  dholland  * 			  OR
    124  1.1  dholland  * 				- when very old (hours, days, weeks?)
    125  1.1  dholland  *
    126  1.1  dholland  * For UDP (v2, 3 only), pretty much the old way:
    127  1.1  dholland  * - key on <xid, NFS version, RPC#, Client host ip#>
    128  1.1  dholland  *   (at most one entry for each key)
    129  1.1  dholland  *
    130  1.1  dholland  * When a Request arrives:
    131  1.1  dholland  * - if a match with entry via key
    132  1.1  dholland  * 	- if RPC marked In_progress
    133  1.1  dholland  * 		- discard request (don't send reply)
    134  1.1  dholland  * 	  else
    135  1.1  dholland  * 		- reply from cache
    136  1.1  dholland  * 		- timestamp cache entry
    137  1.1  dholland  *   else
    138  1.1  dholland  * 	- add entry to cache, marked In_progress
    139  1.1  dholland  * 	- do RPC
    140  1.1  dholland  * 	- when RPC done
    141  1.1  dholland  * 		- if RPC# non-idempotent
    142  1.1  dholland  * 			- mark entry Done (not In_progress)
    143  1.1  dholland  * 			- save reply
    144  1.1  dholland  * 			- timestamp cache entry
    145  1.1  dholland  * 		  else
    146  1.1  dholland  * 			- free cache entry
    147  1.1  dholland  * 		- send reply
    148  1.1  dholland  *
    149  1.1  dholland  * Later, entries with saved replies are free'd a short time (few minutes)
    150  1.1  dholland  * after reply sent (timestamp).
    151  1.1  dholland  * Reference: Chet Juszczak, "Improving the Performance and Correctness
    152  1.1  dholland  *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
    153  1.1  dholland  *		pages 53-63. San Diego, February 1989.
    154  1.1  dholland  *	 for the UDP case.
    155  1.1  dholland  * nfsrc_floodlevel is set to the allowable upper limit for saved replies
    156  1.1  dholland  *	for TCP. For V3, a reply won't be saved when the flood level is
    157  1.1  dholland  *	hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
    158  1.1  dholland  *	that case. This level should be set high enough that this almost
    159  1.1  dholland  *	never happens.
    160  1.1  dholland  */
    161  1.1  dholland #ifndef APPLEKEXT
    162  1.4  pgoyette #include <fs/nfs/common/nfsport.h>
    163  1.1  dholland 
    164  1.3  pgoyette extern struct nfsstatsv1 nfsstatsv1;
    165  1.1  dholland extern struct mtx nfsrc_udpmtx;
    166  1.1  dholland extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
    167  1.3  pgoyette extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
    168  1.1  dholland int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
    169  1.1  dholland #endif	/* !APPLEKEXT */
    170  1.1  dholland 
    171  1.1  dholland SYSCTL_DECL(_vfs_nfsd);
    172  1.1  dholland 
    173  1.1  dholland static u_int	nfsrc_tcphighwater = 0;
    174  1.1  dholland static int
    175  1.1  dholland sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
    176  1.1  dholland {
    177  1.1  dholland 	int error, newhighwater;
    178  1.1  dholland 
    179  1.1  dholland 	newhighwater = nfsrc_tcphighwater;
    180  1.1  dholland 	error = sysctl_handle_int(oidp, &newhighwater, 0, req);
    181  1.1  dholland 	if (error != 0 || req->newptr == NULL)
    182  1.1  dholland 		return (error);
    183  1.1  dholland 	if (newhighwater < 0)
    184  1.1  dholland 		return (EINVAL);
    185  1.1  dholland 	if (newhighwater >= nfsrc_floodlevel)
    186  1.1  dholland 		nfsrc_floodlevel = newhighwater + newhighwater / 5;
    187  1.1  dholland 	nfsrc_tcphighwater = newhighwater;
    188  1.1  dholland 	return (0);
    189  1.1  dholland }
    190  1.1  dholland SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
    191  1.1  dholland     sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
    192  1.1  dholland     "High water mark for TCP cache entries");
    193  1.1  dholland 
    194  1.1  dholland static u_int	nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
    195  1.1  dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
    196  1.1  dholland     &nfsrc_udphighwater, 0,
    197  1.1  dholland     "High water mark for UDP cache entries");
    198  1.1  dholland static u_int	nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
    199  1.1  dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
    200  1.1  dholland     &nfsrc_tcptimeout, 0,
    201  1.1  dholland     "Timeout for TCP entries in the DRC");
    202  1.1  dholland static u_int nfsrc_tcpnonidempotent = 1;
    203  1.1  dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
    204  1.1  dholland     &nfsrc_tcpnonidempotent, 0,
    205  1.1  dholland     "Enable the DRC for NFS over TCP");
    206  1.1  dholland 
    207  1.1  dholland static int nfsrc_udpcachesize = 0;
    208  1.1  dholland static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
    209  1.1  dholland static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
    210  1.1  dholland 
    211  1.1  dholland /*
    212  1.1  dholland  * and the reverse mapping from generic to Version 2 procedure numbers
    213  1.1  dholland  */
    214  1.1  dholland static int newnfsv2_procid[NFS_V3NPROCS] = {
    215  1.1  dholland 	NFSV2PROC_NULL,
    216  1.1  dholland 	NFSV2PROC_GETATTR,
    217  1.1  dholland 	NFSV2PROC_SETATTR,
    218  1.1  dholland 	NFSV2PROC_LOOKUP,
    219  1.1  dholland 	NFSV2PROC_NOOP,
    220  1.1  dholland 	NFSV2PROC_READLINK,
    221  1.1  dholland 	NFSV2PROC_READ,
    222  1.1  dholland 	NFSV2PROC_WRITE,
    223  1.1  dholland 	NFSV2PROC_CREATE,
    224  1.1  dholland 	NFSV2PROC_MKDIR,
    225  1.1  dholland 	NFSV2PROC_SYMLINK,
    226  1.1  dholland 	NFSV2PROC_CREATE,
    227  1.1  dholland 	NFSV2PROC_REMOVE,
    228  1.1  dholland 	NFSV2PROC_RMDIR,
    229  1.1  dholland 	NFSV2PROC_RENAME,
    230  1.1  dholland 	NFSV2PROC_LINK,
    231  1.1  dholland 	NFSV2PROC_READDIR,
    232  1.1  dholland 	NFSV2PROC_NOOP,
    233  1.1  dholland 	NFSV2PROC_STATFS,
    234  1.1  dholland 	NFSV2PROC_NOOP,
    235  1.1  dholland 	NFSV2PROC_NOOP,
    236  1.1  dholland 	NFSV2PROC_NOOP,
    237  1.1  dholland };
    238  1.1  dholland 
    239  1.1  dholland #define	nfsrc_hash(xid)	(((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
    240  1.1  dholland #define	NFSRCUDPHASH(xid) \
    241  1.1  dholland 	(&nfsrvudphashtbl[nfsrc_hash(xid)])
    242  1.1  dholland #define	NFSRCHASH(xid) \
    243  1.1  dholland 	(&nfsrchash_table[nfsrc_hash(xid)].tbl)
    244  1.3  pgoyette #define	NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)])
    245  1.1  dholland #define	TRUE	1
    246  1.1  dholland #define	FALSE	0
    247  1.1  dholland #define	NFSRVCACHE_CHECKLEN	100
    248  1.1  dholland 
    249  1.1  dholland /* True iff the rpc reply is an nfs status ONLY! */
    250  1.1  dholland static int nfsv2_repstat[NFS_V3NPROCS] = {
    251  1.1  dholland 	FALSE,
    252  1.1  dholland 	FALSE,
    253  1.1  dholland 	FALSE,
    254  1.1  dholland 	FALSE,
    255  1.1  dholland 	FALSE,
    256  1.1  dholland 	FALSE,
    257  1.1  dholland 	FALSE,
    258  1.1  dholland 	FALSE,
    259  1.1  dholland 	FALSE,
    260  1.1  dholland 	FALSE,
    261  1.1  dholland 	TRUE,
    262  1.1  dholland 	TRUE,
    263  1.1  dholland 	TRUE,
    264  1.1  dholland 	TRUE,
    265  1.1  dholland 	FALSE,
    266  1.1  dholland 	TRUE,
    267  1.1  dholland 	FALSE,
    268  1.1  dholland 	FALSE,
    269  1.1  dholland 	FALSE,
    270  1.1  dholland 	FALSE,
    271  1.1  dholland 	FALSE,
    272  1.1  dholland 	FALSE,
    273  1.1  dholland };
    274  1.1  dholland 
    275  1.1  dholland /*
    276  1.1  dholland  * Will NFS want to work over IPv6 someday?
    277  1.1  dholland  */
    278  1.1  dholland #define	NETFAMILY(rp) \
    279  1.1  dholland 		(((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
    280  1.1  dholland 
    281  1.1  dholland /* local functions */
    282  1.1  dholland static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
    283  1.1  dholland static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
    284  1.1  dholland static void nfsrc_lock(struct nfsrvcache *rp);
    285  1.1  dholland static void nfsrc_unlock(struct nfsrvcache *rp);
    286  1.1  dholland static void nfsrc_wanted(struct nfsrvcache *rp);
    287  1.1  dholland static void nfsrc_freecache(struct nfsrvcache *rp);
    288  1.1  dholland static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
    289  1.1  dholland static void nfsrc_marksametcpconn(u_int64_t);
    290  1.1  dholland 
    291  1.1  dholland /*
    292  1.1  dholland  * Return the correct mutex for this cache entry.
    293  1.1  dholland  */
    294  1.1  dholland static __inline struct mtx *
    295  1.1  dholland nfsrc_cachemutex(struct nfsrvcache *rp)
    296  1.1  dholland {
    297  1.1  dholland 
    298  1.1  dholland 	if ((rp->rc_flag & RC_UDP) != 0)
    299  1.1  dholland 		return (&nfsrc_udpmtx);
    300  1.1  dholland 	return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
    301  1.1  dholland }
    302  1.1  dholland 
    303  1.1  dholland /*
    304  1.1  dholland  * Initialize the server request cache list
    305  1.1  dholland  */
    306  1.1  dholland APPLESTATIC void
    307  1.1  dholland nfsrvd_initcache(void)
    308  1.1  dholland {
    309  1.1  dholland 	int i;
    310  1.1  dholland 	static int inited = 0;
    311  1.1  dholland 
    312  1.1  dholland 	if (inited)
    313  1.1  dholland 		return;
    314  1.1  dholland 	inited = 1;
    315  1.1  dholland 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    316  1.1  dholland 		LIST_INIT(&nfsrvudphashtbl[i]);
    317  1.1  dholland 		LIST_INIT(&nfsrchash_table[i].tbl);
    318  1.3  pgoyette 		LIST_INIT(&nfsrcahash_table[i].tbl);
    319  1.1  dholland 	}
    320  1.1  dholland 	TAILQ_INIT(&nfsrvudplru);
    321  1.1  dholland 	nfsrc_tcpsavedreplies = 0;
    322  1.1  dholland 	nfsrc_udpcachesize = 0;
    323  1.3  pgoyette 	nfsstatsv1.srvcache_tcppeak = 0;
    324  1.3  pgoyette 	nfsstatsv1.srvcache_size = 0;
    325  1.1  dholland }
    326  1.1  dholland 
    327  1.1  dholland /*
    328  1.1  dholland  * Get a cache entry for this request. Basically just malloc a new one
    329  1.1  dholland  * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
    330  1.1  dholland  */
    331  1.1  dholland APPLESTATIC int
    332  1.3  pgoyette nfsrvd_getcache(struct nfsrv_descript *nd)
    333  1.1  dholland {
    334  1.1  dholland 	struct nfsrvcache *newrp;
    335  1.1  dholland 	int ret;
    336  1.1  dholland 
    337  1.1  dholland 	if (nd->nd_procnum == NFSPROC_NULL)
    338  1.1  dholland 		panic("nfsd cache null");
    339  1.1  dholland 	MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
    340  1.1  dholland 	    M_NFSRVCACHE, M_WAITOK);
    341  1.1  dholland 	NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
    342  1.1  dholland 	if (nd->nd_flag & ND_NFSV4)
    343  1.1  dholland 		newrp->rc_flag = RC_NFSV4;
    344  1.1  dholland 	else if (nd->nd_flag & ND_NFSV3)
    345  1.1  dholland 		newrp->rc_flag = RC_NFSV3;
    346  1.1  dholland 	else
    347  1.1  dholland 		newrp->rc_flag = RC_NFSV2;
    348  1.1  dholland 	newrp->rc_xid = nd->nd_retxid;
    349  1.1  dholland 	newrp->rc_proc = nd->nd_procnum;
    350  1.1  dholland 	newrp->rc_sockref = nd->nd_sockref;
    351  1.1  dholland 	newrp->rc_cachetime = nd->nd_tcpconntime;
    352  1.1  dholland 	if (nd->nd_flag & ND_SAMETCPCONN)
    353  1.1  dholland 		newrp->rc_flag |= RC_SAMETCPCONN;
    354  1.1  dholland 	if (nd->nd_nam2 != NULL) {
    355  1.1  dholland 		newrp->rc_flag |= RC_UDP;
    356  1.1  dholland 		ret = nfsrc_getudp(nd, newrp);
    357  1.1  dholland 	} else {
    358  1.1  dholland 		ret = nfsrc_gettcp(nd, newrp);
    359  1.1  dholland 	}
    360  1.1  dholland 	NFSEXITCODE2(0, nd);
    361  1.1  dholland 	return (ret);
    362  1.1  dholland }
    363  1.1  dholland 
    364  1.1  dholland /*
    365  1.1  dholland  * For UDP (v2, v3):
    366  1.1  dholland  * - key on <xid, NFS version, RPC#, Client host ip#>
    367  1.1  dholland  *   (at most one entry for each key)
    368  1.1  dholland  */
    369  1.1  dholland static int
    370  1.1  dholland nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
    371  1.1  dholland {
    372  1.1  dholland 	struct nfsrvcache *rp;
    373  1.1  dholland 	struct sockaddr_in *saddr;
    374  1.1  dholland 	struct sockaddr_in6 *saddr6;
    375  1.1  dholland 	struct nfsrvhashhead *hp;
    376  1.1  dholland 	int ret = 0;
    377  1.1  dholland 	struct mtx *mutex;
    378  1.1  dholland 
    379  1.1  dholland 	mutex = nfsrc_cachemutex(newrp);
    380  1.1  dholland 	hp = NFSRCUDPHASH(newrp->rc_xid);
    381  1.1  dholland loop:
    382  1.1  dholland 	mtx_lock(mutex);
    383  1.1  dholland 	LIST_FOREACH(rp, hp, rc_hash) {
    384  1.1  dholland 	    if (newrp->rc_xid == rp->rc_xid &&
    385  1.1  dholland 		newrp->rc_proc == rp->rc_proc &&
    386  1.1  dholland 		(newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
    387  1.1  dholland 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
    388  1.1  dholland 			if ((rp->rc_flag & RC_LOCKED) != 0) {
    389  1.1  dholland 				rp->rc_flag |= RC_WANTED;
    390  1.1  dholland 				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
    391  1.1  dholland 				    "nfsrc", 10 * hz);
    392  1.1  dholland 				goto loop;
    393  1.1  dholland 			}
    394  1.1  dholland 			if (rp->rc_flag == 0)
    395  1.1  dholland 				panic("nfs udp cache0");
    396  1.1  dholland 			rp->rc_flag |= RC_LOCKED;
    397  1.1  dholland 			TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    398  1.1  dholland 			TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
    399  1.1  dholland 			if (rp->rc_flag & RC_INPROG) {
    400  1.3  pgoyette 				nfsstatsv1.srvcache_inproghits++;
    401  1.1  dholland 				mtx_unlock(mutex);
    402  1.1  dholland 				ret = RC_DROPIT;
    403  1.1  dholland 			} else if (rp->rc_flag & RC_REPSTATUS) {
    404  1.1  dholland 				/*
    405  1.1  dholland 				 * V2 only.
    406  1.1  dholland 				 */
    407  1.3  pgoyette 				nfsstatsv1.srvcache_nonidemdonehits++;
    408  1.1  dholland 				mtx_unlock(mutex);
    409  1.1  dholland 				nfsrvd_rephead(nd);
    410  1.1  dholland 				*(nd->nd_errp) = rp->rc_status;
    411  1.1  dholland 				ret = RC_REPLY;
    412  1.1  dholland 				rp->rc_timestamp = NFSD_MONOSEC +
    413  1.1  dholland 					NFSRVCACHE_UDPTIMEOUT;
    414  1.1  dholland 			} else if (rp->rc_flag & RC_REPMBUF) {
    415  1.3  pgoyette 				nfsstatsv1.srvcache_nonidemdonehits++;
    416  1.1  dholland 				mtx_unlock(mutex);
    417  1.1  dholland 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
    418  1.1  dholland 					M_COPYALL, M_WAITOK);
    419  1.1  dholland 				ret = RC_REPLY;
    420  1.1  dholland 				rp->rc_timestamp = NFSD_MONOSEC +
    421  1.1  dholland 					NFSRVCACHE_UDPTIMEOUT;
    422  1.1  dholland 			} else {
    423  1.1  dholland 				panic("nfs udp cache1");
    424  1.1  dholland 			}
    425  1.1  dholland 			nfsrc_unlock(rp);
    426  1.1  dholland 			free((caddr_t)newrp, M_NFSRVCACHE);
    427  1.1  dholland 			goto out;
    428  1.1  dholland 		}
    429  1.1  dholland 	}
    430  1.3  pgoyette 	nfsstatsv1.srvcache_misses++;
    431  1.3  pgoyette 	atomic_add_int(&nfsstatsv1.srvcache_size, 1);
    432  1.1  dholland 	nfsrc_udpcachesize++;
    433  1.1  dholland 
    434  1.1  dholland 	newrp->rc_flag |= RC_INPROG;
    435  1.1  dholland 	saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
    436  1.1  dholland 	if (saddr->sin_family == AF_INET)
    437  1.1  dholland 		newrp->rc_inet = saddr->sin_addr.s_addr;
    438  1.1  dholland 	else if (saddr->sin_family == AF_INET6) {
    439  1.1  dholland 		saddr6 = (struct sockaddr_in6 *)saddr;
    440  1.1  dholland 		NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
    441  1.1  dholland 		    sizeof (struct in6_addr));
    442  1.1  dholland 		newrp->rc_flag |= RC_INETIPV6;
    443  1.1  dholland 	}
    444  1.1  dholland 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
    445  1.1  dholland 	TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
    446  1.1  dholland 	mtx_unlock(mutex);
    447  1.1  dholland 	nd->nd_rp = newrp;
    448  1.1  dholland 	ret = RC_DOIT;
    449  1.1  dholland 
    450  1.1  dholland out:
    451  1.1  dholland 	NFSEXITCODE2(0, nd);
    452  1.1  dholland 	return (ret);
    453  1.1  dholland }
    454  1.1  dholland 
    455  1.1  dholland /*
    456  1.1  dholland  * Update a request cache entry after the rpc has been done
    457  1.1  dholland  */
    458  1.1  dholland APPLESTATIC struct nfsrvcache *
    459  1.3  pgoyette nfsrvd_updatecache(struct nfsrv_descript *nd)
    460  1.1  dholland {
    461  1.1  dholland 	struct nfsrvcache *rp;
    462  1.1  dholland 	struct nfsrvcache *retrp = NULL;
    463  1.1  dholland 	mbuf_t m;
    464  1.1  dholland 	struct mtx *mutex;
    465  1.1  dholland 
    466  1.1  dholland 	rp = nd->nd_rp;
    467  1.1  dholland 	if (!rp)
    468  1.1  dholland 		panic("nfsrvd_updatecache null rp");
    469  1.1  dholland 	nd->nd_rp = NULL;
    470  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    471  1.1  dholland 	mtx_lock(mutex);
    472  1.1  dholland 	nfsrc_lock(rp);
    473  1.1  dholland 	if (!(rp->rc_flag & RC_INPROG))
    474  1.1  dholland 		panic("nfsrvd_updatecache not inprog");
    475  1.1  dholland 	rp->rc_flag &= ~RC_INPROG;
    476  1.1  dholland 	if (rp->rc_flag & RC_UDP) {
    477  1.1  dholland 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    478  1.1  dholland 		TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
    479  1.1  dholland 	}
    480  1.1  dholland 
    481  1.1  dholland 	/*
    482  1.1  dholland 	 * Reply from cache is a special case returned by nfsrv_checkseqid().
    483  1.1  dholland 	 */
    484  1.1  dholland 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
    485  1.3  pgoyette 		nfsstatsv1.srvcache_nonidemdonehits++;
    486  1.1  dholland 		mtx_unlock(mutex);
    487  1.1  dholland 		nd->nd_repstat = 0;
    488  1.5       rin 		mbuf_freem(nd->nd_mreq);
    489  1.1  dholland 		if (!(rp->rc_flag & RC_REPMBUF))
    490  1.1  dholland 			panic("reply from cache");
    491  1.1  dholland 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
    492  1.1  dholland 		    M_COPYALL, M_WAITOK);
    493  1.1  dholland 		rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    494  1.1  dholland 		nfsrc_unlock(rp);
    495  1.1  dholland 		goto out;
    496  1.1  dholland 	}
    497  1.1  dholland 
    498  1.1  dholland 	/*
    499  1.1  dholland 	 * If rc_refcnt > 0, save it
    500  1.1  dholland 	 * For UDP, save it if ND_SAVEREPLY is set
    501  1.1  dholland 	 * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
    502  1.1  dholland 	 */
    503  1.1  dholland 	if (nd->nd_repstat != NFSERR_DONTREPLY &&
    504  1.1  dholland 	    (rp->rc_refcnt > 0 ||
    505  1.1  dholland 	     ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
    506  1.1  dholland 	     ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
    507  1.1  dholland 	      nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
    508  1.1  dholland 	      nfsrc_tcpnonidempotent))) {
    509  1.1  dholland 		if (rp->rc_refcnt > 0) {
    510  1.1  dholland 			if (!(rp->rc_flag & RC_NFSV4))
    511  1.1  dholland 				panic("update_cache refcnt");
    512  1.1  dholland 			rp->rc_flag |= RC_REFCNT;
    513  1.1  dholland 		}
    514  1.1  dholland 		if ((nd->nd_flag & ND_NFSV2) &&
    515  1.1  dholland 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
    516  1.1  dholland 			rp->rc_status = nd->nd_repstat;
    517  1.1  dholland 			rp->rc_flag |= RC_REPSTATUS;
    518  1.1  dholland 			mtx_unlock(mutex);
    519  1.1  dholland 		} else {
    520  1.1  dholland 			if (!(rp->rc_flag & RC_UDP)) {
    521  1.1  dholland 			    atomic_add_int(&nfsrc_tcpsavedreplies, 1);
    522  1.1  dholland 			    if (nfsrc_tcpsavedreplies >
    523  1.3  pgoyette 				nfsstatsv1.srvcache_tcppeak)
    524  1.3  pgoyette 				nfsstatsv1.srvcache_tcppeak =
    525  1.1  dholland 				    nfsrc_tcpsavedreplies;
    526  1.1  dholland 			}
    527  1.1  dholland 			mtx_unlock(mutex);
    528  1.1  dholland 			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
    529  1.1  dholland 			mtx_lock(mutex);
    530  1.1  dholland 			rp->rc_reply = m;
    531  1.1  dholland 			rp->rc_flag |= RC_REPMBUF;
    532  1.1  dholland 			mtx_unlock(mutex);
    533  1.1  dholland 		}
    534  1.1  dholland 		if (rp->rc_flag & RC_UDP) {
    535  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC +
    536  1.1  dholland 			    NFSRVCACHE_UDPTIMEOUT;
    537  1.1  dholland 			nfsrc_unlock(rp);
    538  1.1  dholland 		} else {
    539  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    540  1.1  dholland 			if (rp->rc_refcnt > 0)
    541  1.1  dholland 				nfsrc_unlock(rp);
    542  1.1  dholland 			else
    543  1.1  dholland 				retrp = rp;
    544  1.1  dholland 		}
    545  1.1  dholland 	} else {
    546  1.1  dholland 		nfsrc_freecache(rp);
    547  1.1  dholland 		mtx_unlock(mutex);
    548  1.1  dholland 	}
    549  1.1  dholland 
    550  1.1  dholland out:
    551  1.1  dholland 	NFSEXITCODE2(0, nd);
    552  1.1  dholland 	return (retrp);
    553  1.1  dholland }
    554  1.1  dholland 
    555  1.1  dholland /*
    556  1.1  dholland  * Invalidate and, if possible, free an in prog cache entry.
    557  1.1  dholland  * Must not sleep.
    558  1.1  dholland  */
    559  1.1  dholland APPLESTATIC void
    560  1.1  dholland nfsrvd_delcache(struct nfsrvcache *rp)
    561  1.1  dholland {
    562  1.1  dholland 	struct mtx *mutex;
    563  1.1  dholland 
    564  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    565  1.1  dholland 	if (!(rp->rc_flag & RC_INPROG))
    566  1.1  dholland 		panic("nfsrvd_delcache not in prog");
    567  1.1  dholland 	mtx_lock(mutex);
    568  1.1  dholland 	rp->rc_flag &= ~RC_INPROG;
    569  1.1  dholland 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
    570  1.1  dholland 		nfsrc_freecache(rp);
    571  1.1  dholland 	mtx_unlock(mutex);
    572  1.1  dholland }
    573  1.1  dholland 
    574  1.1  dholland /*
    575  1.1  dholland  * Called after nfsrvd_updatecache() once the reply is sent, to update
    576  1.3  pgoyette  * the entry's sequence number and unlock it. The argument is
    577  1.1  dholland  * the pointer returned by nfsrvd_updatecache().
    578  1.1  dholland  */
    579  1.1  dholland APPLESTATIC void
    580  1.3  pgoyette nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq)
    581  1.1  dholland {
    582  1.3  pgoyette 	struct nfsrchash_bucket *hbp;
    583  1.1  dholland 
    584  1.3  pgoyette 	KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked"));
    585  1.3  pgoyette 	if (have_seq) {
    586  1.3  pgoyette 		hbp = NFSRCAHASH(rp->rc_sockref);
    587  1.3  pgoyette 		mtx_lock(&hbp->mtx);
    588  1.3  pgoyette 		rp->rc_tcpseq = seq;
    589  1.3  pgoyette 		if (rp->rc_acked != RC_NO_ACK)
    590  1.3  pgoyette 			LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash);
    591  1.3  pgoyette 		rp->rc_acked = RC_NO_ACK;
    592  1.3  pgoyette 		mtx_unlock(&hbp->mtx);
    593  1.1  dholland 	}
    594  1.1  dholland 	nfsrc_unlock(rp);
    595  1.1  dholland }
    596  1.1  dholland 
    597  1.1  dholland /*
    598  1.1  dholland  * Get a cache entry for TCP
    599  1.1  dholland  * - key on <xid, nfs version>
    600  1.1  dholland  *   (allow multiple entries for a given key)
    601  1.1  dholland  */
    602  1.1  dholland static int
    603  1.1  dholland nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
    604  1.1  dholland {
    605  1.1  dholland 	struct nfsrvcache *rp, *nextrp;
    606  1.1  dholland 	int i;
    607  1.1  dholland 	struct nfsrvcache *hitrp;
    608  1.1  dholland 	struct nfsrvhashhead *hp, nfsrc_templist;
    609  1.1  dholland 	int hit, ret = 0;
    610  1.1  dholland 	struct mtx *mutex;
    611  1.1  dholland 
    612  1.1  dholland 	mutex = nfsrc_cachemutex(newrp);
    613  1.1  dholland 	hp = NFSRCHASH(newrp->rc_xid);
    614  1.1  dholland 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
    615  1.1  dholland tryagain:
    616  1.1  dholland 	mtx_lock(mutex);
    617  1.1  dholland 	hit = 1;
    618  1.1  dholland 	LIST_INIT(&nfsrc_templist);
    619  1.1  dholland 	/*
    620  1.1  dholland 	 * Get all the matches and put them on the temp list.
    621  1.1  dholland 	 */
    622  1.1  dholland 	rp = LIST_FIRST(hp);
    623  1.2  christos 	while (rp != NULL) {
    624  1.1  dholland 		nextrp = LIST_NEXT(rp, rc_hash);
    625  1.1  dholland 		if (newrp->rc_xid == rp->rc_xid &&
    626  1.1  dholland 		    (!(rp->rc_flag & RC_INPROG) ||
    627  1.1  dholland 		     ((newrp->rc_flag & RC_SAMETCPCONN) &&
    628  1.1  dholland 		      newrp->rc_sockref == rp->rc_sockref)) &&
    629  1.1  dholland 		    (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
    630  1.1  dholland 		    newrp->rc_proc == rp->rc_proc &&
    631  1.1  dholland 		    ((newrp->rc_flag & RC_NFSV4) &&
    632  1.1  dholland 		     newrp->rc_sockref != rp->rc_sockref &&
    633  1.1  dholland 		     newrp->rc_cachetime >= rp->rc_cachetime)
    634  1.1  dholland 		    && newrp->rc_reqlen == rp->rc_reqlen &&
    635  1.1  dholland 		    newrp->rc_cksum == rp->rc_cksum) {
    636  1.1  dholland 			LIST_REMOVE(rp, rc_hash);
    637  1.1  dholland 			LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
    638  1.1  dholland 		}
    639  1.1  dholland 		rp = nextrp;
    640  1.1  dholland 	}
    641  1.1  dholland 
    642  1.1  dholland 	/*
    643  1.1  dholland 	 * Now, use nfsrc_templist to decide if there is a match.
    644  1.1  dholland 	 */
    645  1.1  dholland 	i = 0;
    646  1.1  dholland 	LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
    647  1.1  dholland 		i++;
    648  1.1  dholland 		if (rp->rc_refcnt > 0) {
    649  1.1  dholland 			hit = 0;
    650  1.1  dholland 			break;
    651  1.1  dholland 		}
    652  1.1  dholland 	}
    653  1.1  dholland 	/*
    654  1.1  dholland 	 * Can be a hit only if one entry left.
    655  1.1  dholland 	 * Note possible hit entry and put nfsrc_templist back on hash
    656  1.1  dholland 	 * list.
    657  1.1  dholland 	 */
    658  1.1  dholland 	if (i != 1)
    659  1.1  dholland 		hit = 0;
    660  1.1  dholland 	hitrp = rp = LIST_FIRST(&nfsrc_templist);
    661  1.2  christos 	while (rp != NULL) {
    662  1.1  dholland 		nextrp = LIST_NEXT(rp, rc_hash);
    663  1.1  dholland 		LIST_REMOVE(rp, rc_hash);
    664  1.1  dholland 		LIST_INSERT_HEAD(hp, rp, rc_hash);
    665  1.1  dholland 		rp = nextrp;
    666  1.1  dholland 	}
    667  1.2  christos 	if (LIST_FIRST(&nfsrc_templist) != NULL)
    668  1.1  dholland 		panic("nfs gettcp cache templist");
    669  1.1  dholland 
    670  1.1  dholland 	if (hit) {
    671  1.1  dholland 		rp = hitrp;
    672  1.1  dholland 		if ((rp->rc_flag & RC_LOCKED) != 0) {
    673  1.1  dholland 			rp->rc_flag |= RC_WANTED;
    674  1.1  dholland 			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
    675  1.1  dholland 			    "nfsrc", 10 * hz);
    676  1.1  dholland 			goto tryagain;
    677  1.1  dholland 		}
    678  1.1  dholland 		if (rp->rc_flag == 0)
    679  1.1  dholland 			panic("nfs tcp cache0");
    680  1.1  dholland 		rp->rc_flag |= RC_LOCKED;
    681  1.1  dholland 		if (rp->rc_flag & RC_INPROG) {
    682  1.3  pgoyette 			nfsstatsv1.srvcache_inproghits++;
    683  1.1  dholland 			mtx_unlock(mutex);
    684  1.1  dholland 			if (newrp->rc_sockref == rp->rc_sockref)
    685  1.1  dholland 				nfsrc_marksametcpconn(rp->rc_sockref);
    686  1.1  dholland 			ret = RC_DROPIT;
    687  1.1  dholland 		} else if (rp->rc_flag & RC_REPSTATUS) {
    688  1.1  dholland 			/*
    689  1.1  dholland 			 * V2 only.
    690  1.1  dholland 			 */
    691  1.3  pgoyette 			nfsstatsv1.srvcache_nonidemdonehits++;
    692  1.1  dholland 			mtx_unlock(mutex);
    693  1.1  dholland 			if (newrp->rc_sockref == rp->rc_sockref)
    694  1.1  dholland 				nfsrc_marksametcpconn(rp->rc_sockref);
    695  1.1  dholland 			ret = RC_REPLY;
    696  1.1  dholland 			nfsrvd_rephead(nd);
    697  1.1  dholland 			*(nd->nd_errp) = rp->rc_status;
    698  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    699  1.1  dholland 		} else if (rp->rc_flag & RC_REPMBUF) {
    700  1.3  pgoyette 			nfsstatsv1.srvcache_nonidemdonehits++;
    701  1.1  dholland 			mtx_unlock(mutex);
    702  1.1  dholland 			if (newrp->rc_sockref == rp->rc_sockref)
    703  1.1  dholland 				nfsrc_marksametcpconn(rp->rc_sockref);
    704  1.1  dholland 			ret = RC_REPLY;
    705  1.1  dholland 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
    706  1.1  dholland 				M_COPYALL, M_WAITOK);
    707  1.1  dholland 			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
    708  1.1  dholland 		} else {
    709  1.1  dholland 			panic("nfs tcp cache1");
    710  1.1  dholland 		}
    711  1.1  dholland 		nfsrc_unlock(rp);
    712  1.1  dholland 		free((caddr_t)newrp, M_NFSRVCACHE);
    713  1.1  dholland 		goto out;
    714  1.1  dholland 	}
    715  1.3  pgoyette 	nfsstatsv1.srvcache_misses++;
    716  1.3  pgoyette 	atomic_add_int(&nfsstatsv1.srvcache_size, 1);
    717  1.1  dholland 
    718  1.1  dholland 	/*
    719  1.1  dholland 	 * For TCP, multiple entries for a key are allowed, so don't
    720  1.1  dholland 	 * chain it into the hash table until done.
    721  1.1  dholland 	 */
    722  1.1  dholland 	newrp->rc_cachetime = NFSD_MONOSEC;
    723  1.1  dholland 	newrp->rc_flag |= RC_INPROG;
    724  1.1  dholland 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
    725  1.1  dholland 	mtx_unlock(mutex);
    726  1.1  dholland 	nd->nd_rp = newrp;
    727  1.1  dholland 	ret = RC_DOIT;
    728  1.1  dholland 
    729  1.1  dholland out:
    730  1.1  dholland 	NFSEXITCODE2(0, nd);
    731  1.1  dholland 	return (ret);
    732  1.1  dholland }
    733  1.1  dholland 
    734  1.1  dholland /*
    735  1.1  dholland  * Lock a cache entry.
    736  1.1  dholland  */
    737  1.1  dholland static void
    738  1.1  dholland nfsrc_lock(struct nfsrvcache *rp)
    739  1.1  dholland {
    740  1.1  dholland 	struct mtx *mutex;
    741  1.1  dholland 
    742  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    743  1.1  dholland 	mtx_assert(mutex, MA_OWNED);
    744  1.1  dholland 	while ((rp->rc_flag & RC_LOCKED) != 0) {
    745  1.1  dholland 		rp->rc_flag |= RC_WANTED;
    746  1.1  dholland 		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
    747  1.1  dholland 	}
    748  1.1  dholland 	rp->rc_flag |= RC_LOCKED;
    749  1.1  dholland }
    750  1.1  dholland 
    751  1.1  dholland /*
    752  1.1  dholland  * Unlock a cache entry.
    753  1.1  dholland  */
    754  1.1  dholland static void
    755  1.1  dholland nfsrc_unlock(struct nfsrvcache *rp)
    756  1.1  dholland {
    757  1.1  dholland 	struct mtx *mutex;
    758  1.1  dholland 
    759  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    760  1.1  dholland 	mtx_lock(mutex);
    761  1.1  dholland 	rp->rc_flag &= ~RC_LOCKED;
    762  1.1  dholland 	nfsrc_wanted(rp);
    763  1.1  dholland 	mtx_unlock(mutex);
    764  1.1  dholland }
    765  1.1  dholland 
    766  1.1  dholland /*
    767  1.1  dholland  * Wakeup anyone wanting entry.
    768  1.1  dholland  */
    769  1.1  dholland static void
    770  1.1  dholland nfsrc_wanted(struct nfsrvcache *rp)
    771  1.1  dholland {
    772  1.1  dholland 	if (rp->rc_flag & RC_WANTED) {
    773  1.1  dholland 		rp->rc_flag &= ~RC_WANTED;
    774  1.1  dholland 		wakeup((caddr_t)rp);
    775  1.1  dholland 	}
    776  1.1  dholland }
    777  1.1  dholland 
    778  1.1  dholland /*
    779  1.1  dholland  * Free up the entry.
    780  1.1  dholland  * Must not sleep.
    781  1.1  dholland  */
    782  1.1  dholland static void
    783  1.1  dholland nfsrc_freecache(struct nfsrvcache *rp)
    784  1.1  dholland {
    785  1.3  pgoyette 	struct nfsrchash_bucket *hbp;
    786  1.1  dholland 
    787  1.1  dholland 	LIST_REMOVE(rp, rc_hash);
    788  1.1  dholland 	if (rp->rc_flag & RC_UDP) {
    789  1.1  dholland 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
    790  1.1  dholland 		nfsrc_udpcachesize--;
    791  1.3  pgoyette 	} else if (rp->rc_acked != RC_NO_SEQ) {
    792  1.3  pgoyette 		hbp = NFSRCAHASH(rp->rc_sockref);
    793  1.3  pgoyette 		mtx_lock(&hbp->mtx);
    794  1.3  pgoyette 		if (rp->rc_acked == RC_NO_ACK)
    795  1.3  pgoyette 			LIST_REMOVE(rp, rc_ahash);
    796  1.3  pgoyette 		mtx_unlock(&hbp->mtx);
    797  1.1  dholland 	}
    798  1.1  dholland 	nfsrc_wanted(rp);
    799  1.1  dholland 	if (rp->rc_flag & RC_REPMBUF) {
    800  1.1  dholland 		mbuf_freem(rp->rc_reply);
    801  1.1  dholland 		if (!(rp->rc_flag & RC_UDP))
    802  1.1  dholland 			atomic_add_int(&nfsrc_tcpsavedreplies, -1);
    803  1.1  dholland 	}
    804  1.1  dholland 	FREE((caddr_t)rp, M_NFSRVCACHE);
    805  1.3  pgoyette 	atomic_add_int(&nfsstatsv1.srvcache_size, -1);
    806  1.1  dholland }
    807  1.1  dholland 
    808  1.1  dholland /*
    809  1.1  dholland  * Clean out the cache. Called when nfsserver module is unloaded.
    810  1.1  dholland  */
    811  1.1  dholland APPLESTATIC void
    812  1.1  dholland nfsrvd_cleancache(void)
    813  1.1  dholland {
    814  1.1  dholland 	struct nfsrvcache *rp, *nextrp;
    815  1.1  dholland 	int i;
    816  1.1  dholland 
    817  1.1  dholland 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    818  1.1  dholland 		mtx_lock(&nfsrchash_table[i].mtx);
    819  1.1  dholland 		LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
    820  1.1  dholland 			nfsrc_freecache(rp);
    821  1.1  dholland 		mtx_unlock(&nfsrchash_table[i].mtx);
    822  1.1  dholland 	}
    823  1.1  dholland 	mtx_lock(&nfsrc_udpmtx);
    824  1.1  dholland 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    825  1.1  dholland 		LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
    826  1.1  dholland 			nfsrc_freecache(rp);
    827  1.1  dholland 		}
    828  1.1  dholland 	}
    829  1.3  pgoyette 	nfsstatsv1.srvcache_size = 0;
    830  1.1  dholland 	mtx_unlock(&nfsrc_udpmtx);
    831  1.1  dholland 	nfsrc_tcpsavedreplies = 0;
    832  1.1  dholland }
    833  1.1  dholland 
    834  1.3  pgoyette #define HISTSIZE	16
    835  1.1  dholland /*
    836  1.1  dholland  * The basic rule is to get rid of entries that are expired.
    837  1.1  dholland  */
    838  1.3  pgoyette void
    839  1.3  pgoyette nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final)
    840  1.1  dholland {
    841  1.3  pgoyette 	struct nfsrchash_bucket *hbp;
    842  1.1  dholland 	struct nfsrvcache *rp, *nextrp;
    843  1.3  pgoyette 	int force, lastslot, i, j, k, tto, time_histo[HISTSIZE];
    844  1.1  dholland 	time_t thisstamp;
    845  1.1  dholland 	static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
    846  1.3  pgoyette 	static int onethread = 0, oneslot = 0;
    847  1.3  pgoyette 
    848  1.3  pgoyette 	if (sockref != 0) {
    849  1.3  pgoyette 		hbp = NFSRCAHASH(sockref);
    850  1.3  pgoyette 		mtx_lock(&hbp->mtx);
    851  1.3  pgoyette 		LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) {
    852  1.3  pgoyette 			if (sockref == rp->rc_sockref) {
    853  1.3  pgoyette 				if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) {
    854  1.3  pgoyette 					rp->rc_acked = RC_ACK;
    855  1.3  pgoyette 					LIST_REMOVE(rp, rc_ahash);
    856  1.3  pgoyette 				} else if (final) {
    857  1.3  pgoyette 					rp->rc_acked = RC_NACK;
    858  1.3  pgoyette 					LIST_REMOVE(rp, rc_ahash);
    859  1.3  pgoyette 				}
    860  1.3  pgoyette 			}
    861  1.3  pgoyette 		}
    862  1.3  pgoyette 		mtx_unlock(&hbp->mtx);
    863  1.3  pgoyette 	}
    864  1.1  dholland 
    865  1.1  dholland 	if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
    866  1.1  dholland 		return;
    867  1.1  dholland 	if (NFSD_MONOSEC != udp_lasttrim ||
    868  1.1  dholland 	    nfsrc_udpcachesize >= (nfsrc_udphighwater +
    869  1.1  dholland 	    nfsrc_udphighwater / 2)) {
    870  1.1  dholland 		mtx_lock(&nfsrc_udpmtx);
    871  1.1  dholland 		udp_lasttrim = NFSD_MONOSEC;
    872  1.1  dholland 		TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
    873  1.1  dholland 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
    874  1.1  dholland 			     && rp->rc_refcnt == 0
    875  1.1  dholland 			     && ((rp->rc_flag & RC_REFCNT) ||
    876  1.1  dholland 				 udp_lasttrim > rp->rc_timestamp ||
    877  1.1  dholland 				 nfsrc_udpcachesize > nfsrc_udphighwater))
    878  1.1  dholland 				nfsrc_freecache(rp);
    879  1.1  dholland 		}
    880  1.1  dholland 		mtx_unlock(&nfsrc_udpmtx);
    881  1.1  dholland 	}
    882  1.1  dholland 	if (NFSD_MONOSEC != tcp_lasttrim ||
    883  1.1  dholland 	    nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
    884  1.3  pgoyette 		force = nfsrc_tcphighwater / 4;
    885  1.3  pgoyette 		if (force > 0 &&
    886  1.3  pgoyette 		    nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) {
    887  1.3  pgoyette 			for (i = 0; i < HISTSIZE; i++)
    888  1.3  pgoyette 				time_histo[i] = 0;
    889  1.3  pgoyette 			i = 0;
    890  1.3  pgoyette 			lastslot = NFSRVCACHE_HASHSIZE - 1;
    891  1.3  pgoyette 		} else {
    892  1.3  pgoyette 			force = 0;
    893  1.3  pgoyette 			if (NFSD_MONOSEC != tcp_lasttrim) {
    894  1.3  pgoyette 				i = 0;
    895  1.3  pgoyette 				lastslot = NFSRVCACHE_HASHSIZE - 1;
    896  1.3  pgoyette 			} else {
    897  1.3  pgoyette 				lastslot = i = oneslot;
    898  1.3  pgoyette 				if (++oneslot >= NFSRVCACHE_HASHSIZE)
    899  1.3  pgoyette 					oneslot = 0;
    900  1.3  pgoyette 			}
    901  1.3  pgoyette 		}
    902  1.3  pgoyette 		tto = nfsrc_tcptimeout;
    903  1.3  pgoyette 		tcp_lasttrim = NFSD_MONOSEC;
    904  1.3  pgoyette 		for (; i <= lastslot; i++) {
    905  1.1  dholland 			mtx_lock(&nfsrchash_table[i].mtx);
    906  1.1  dholland 			LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
    907  1.1  dholland 			    nextrp) {
    908  1.1  dholland 				if (!(rp->rc_flag &
    909  1.1  dholland 				     (RC_INPROG|RC_LOCKED|RC_WANTED))
    910  1.1  dholland 				     && rp->rc_refcnt == 0) {
    911  1.3  pgoyette 					if ((rp->rc_flag & RC_REFCNT) ||
    912  1.3  pgoyette 					    tcp_lasttrim > rp->rc_timestamp ||
    913  1.3  pgoyette 					    rp->rc_acked == RC_ACK) {
    914  1.3  pgoyette 						nfsrc_freecache(rp);
    915  1.3  pgoyette 						continue;
    916  1.3  pgoyette 					}
    917  1.3  pgoyette 
    918  1.3  pgoyette 					if (force == 0)
    919  1.3  pgoyette 						continue;
    920  1.1  dholland 					/*
    921  1.1  dholland 					 * The timestamps range from roughly the
    922  1.1  dholland 					 * present (tcp_lasttrim) to the present
    923  1.1  dholland 					 * + nfsrc_tcptimeout. Generate a simple
    924  1.1  dholland 					 * histogram of where the timeouts fall.
    925  1.1  dholland 					 */
    926  1.1  dholland 					j = rp->rc_timestamp - tcp_lasttrim;
    927  1.3  pgoyette 					if (j >= tto)
    928  1.3  pgoyette 						j = HISTSIZE - 1;
    929  1.3  pgoyette 					else if (j < 0)
    930  1.1  dholland 						j = 0;
    931  1.3  pgoyette 					else
    932  1.3  pgoyette 						j = j * HISTSIZE / tto;
    933  1.1  dholland 					time_histo[j]++;
    934  1.1  dholland 				}
    935  1.1  dholland 			}
    936  1.1  dholland 			mtx_unlock(&nfsrchash_table[i].mtx);
    937  1.1  dholland 		}
    938  1.3  pgoyette 		if (force) {
    939  1.1  dholland 			/*
    940  1.1  dholland 			 * Trim some more with a smaller timeout of as little
    941  1.1  dholland 			 * as 20% of nfsrc_tcptimeout to try and get below
    942  1.1  dholland 			 * 80% of the nfsrc_tcphighwater.
    943  1.1  dholland 			 */
    944  1.1  dholland 			k = 0;
    945  1.3  pgoyette 			for (i = 0; i < (HISTSIZE - 2); i++) {
    946  1.1  dholland 				k += time_histo[i];
    947  1.3  pgoyette 				if (k > force)
    948  1.1  dholland 					break;
    949  1.1  dholland 			}
    950  1.3  pgoyette 			k = tto * (i + 1) / HISTSIZE;
    951  1.1  dholland 			if (k < 1)
    952  1.1  dholland 				k = 1;
    953  1.1  dholland 			thisstamp = tcp_lasttrim + k;
    954  1.1  dholland 			for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
    955  1.1  dholland 				mtx_lock(&nfsrchash_table[i].mtx);
    956  1.1  dholland 				LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
    957  1.1  dholland 				    rc_hash, nextrp) {
    958  1.1  dholland 					if (!(rp->rc_flag &
    959  1.1  dholland 					     (RC_INPROG|RC_LOCKED|RC_WANTED))
    960  1.1  dholland 					     && rp->rc_refcnt == 0
    961  1.1  dholland 					     && ((rp->rc_flag & RC_REFCNT) ||
    962  1.1  dholland 						 thisstamp > rp->rc_timestamp ||
    963  1.3  pgoyette 						 rp->rc_acked == RC_ACK))
    964  1.1  dholland 						nfsrc_freecache(rp);
    965  1.1  dholland 				}
    966  1.1  dholland 				mtx_unlock(&nfsrchash_table[i].mtx);
    967  1.1  dholland 			}
    968  1.1  dholland 		}
    969  1.1  dholland 	}
    970  1.1  dholland 	atomic_store_rel_int(&onethread, 0);
    971  1.1  dholland }
    972  1.1  dholland 
    973  1.1  dholland /*
    974  1.1  dholland  * Add a seqid# reference to the cache entry.
    975  1.1  dholland  */
    976  1.1  dholland APPLESTATIC void
    977  1.1  dholland nfsrvd_refcache(struct nfsrvcache *rp)
    978  1.1  dholland {
    979  1.1  dholland 	struct mtx *mutex;
    980  1.1  dholland 
    981  1.3  pgoyette 	if (rp == NULL)
    982  1.3  pgoyette 		/* For NFSv4.1, there is no cache entry. */
    983  1.3  pgoyette 		return;
    984  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
    985  1.1  dholland 	mtx_lock(mutex);
    986  1.1  dholland 	if (rp->rc_refcnt < 0)
    987  1.1  dholland 		panic("nfs cache refcnt");
    988  1.1  dholland 	rp->rc_refcnt++;
    989  1.1  dholland 	mtx_unlock(mutex);
    990  1.1  dholland }
    991  1.1  dholland 
    992  1.1  dholland /*
    993  1.1  dholland  * Dereference a seqid# cache entry.
    994  1.1  dholland  */
    995  1.1  dholland APPLESTATIC void
    996  1.1  dholland nfsrvd_derefcache(struct nfsrvcache *rp)
    997  1.1  dholland {
    998  1.1  dholland 	struct mtx *mutex;
    999  1.1  dholland 
   1000  1.1  dholland 	mutex = nfsrc_cachemutex(rp);
   1001  1.1  dholland 	mtx_lock(mutex);
   1002  1.1  dholland 	if (rp->rc_refcnt <= 0)
   1003  1.1  dholland 		panic("nfs cache derefcnt");
   1004  1.1  dholland 	rp->rc_refcnt--;
   1005  1.1  dholland 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
   1006  1.1  dholland 		nfsrc_freecache(rp);
   1007  1.1  dholland 	mtx_unlock(mutex);
   1008  1.1  dholland }
   1009  1.1  dholland 
   1010  1.1  dholland /*
   1011  1.1  dholland  * Calculate the length of the mbuf list and a checksum on the first up to
   1012  1.1  dholland  * NFSRVCACHE_CHECKLEN bytes.
   1013  1.1  dholland  */
   1014  1.1  dholland static int
   1015  1.1  dholland nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
   1016  1.1  dholland {
   1017  1.1  dholland 	int len = 0, cklen;
   1018  1.1  dholland 	mbuf_t m;
   1019  1.1  dholland 
   1020  1.1  dholland 	m = m1;
   1021  1.1  dholland 	while (m) {
   1022  1.1  dholland 		len += mbuf_len(m);
   1023  1.1  dholland 		m = mbuf_next(m);
   1024  1.1  dholland 	}
   1025  1.1  dholland 	cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
   1026  1.1  dholland 	*cksum = in_cksum(m1, cklen);
   1027  1.1  dholland 	return (len);
   1028  1.1  dholland }
   1029  1.1  dholland 
   1030  1.1  dholland /*
   1031  1.1  dholland  * Mark a TCP connection that is seeing retries. Should never happen for
   1032  1.1  dholland  * NFSv4.
   1033  1.1  dholland  */
   1034  1.1  dholland static void
   1035  1.1  dholland nfsrc_marksametcpconn(u_int64_t sockref)
   1036  1.1  dholland {
   1037  1.1  dholland }
   1038  1.1  dholland 
   1039