nfs_nfsdcache.c revision 1.2.4.2 1 1.2.4.2 rmind /* $NetBSD: nfs_nfsdcache.c,v 1.2.4.2 2014/05/18 17:46:06 rmind Exp $ */
2 1.2.4.2 rmind /*-
3 1.2.4.2 rmind * Copyright (c) 1989, 1993
4 1.2.4.2 rmind * The Regents of the University of California. All rights reserved.
5 1.2.4.2 rmind *
6 1.2.4.2 rmind * This code is derived from software contributed to Berkeley by
7 1.2.4.2 rmind * Rick Macklem at The University of Guelph.
8 1.2.4.2 rmind *
9 1.2.4.2 rmind * Redistribution and use in source and binary forms, with or without
10 1.2.4.2 rmind * modification, are permitted provided that the following conditions
11 1.2.4.2 rmind * are met:
12 1.2.4.2 rmind * 1. Redistributions of source code must retain the above copyright
13 1.2.4.2 rmind * notice, this list of conditions and the following disclaimer.
14 1.2.4.2 rmind * 2. Redistributions in binary form must reproduce the above copyright
15 1.2.4.2 rmind * notice, this list of conditions and the following disclaimer in the
16 1.2.4.2 rmind * documentation and/or other materials provided with the distribution.
17 1.2.4.2 rmind * 4. Neither the name of the University nor the names of its contributors
18 1.2.4.2 rmind * may be used to endorse or promote products derived from this software
19 1.2.4.2 rmind * without specific prior written permission.
20 1.2.4.2 rmind *
21 1.2.4.2 rmind * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 1.2.4.2 rmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 1.2.4.2 rmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 1.2.4.2 rmind * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 1.2.4.2 rmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 1.2.4.2 rmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 1.2.4.2 rmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 1.2.4.2 rmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 1.2.4.2 rmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 1.2.4.2 rmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 1.2.4.2 rmind * SUCH DAMAGE.
32 1.2.4.2 rmind *
33 1.2.4.2 rmind */
34 1.2.4.2 rmind
35 1.2.4.2 rmind #include <sys/cdefs.h>
36 1.2.4.2 rmind /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 254337 2013-08-14 21:11:26Z rmacklem "); */
37 1.2.4.2 rmind __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.2.4.2 2014/05/18 17:46:06 rmind Exp $");
38 1.2.4.2 rmind
39 1.2.4.2 rmind /*
40 1.2.4.2 rmind * Here is the basic algorithm:
41 1.2.4.2 rmind * First, some design criteria I used:
42 1.2.4.2 rmind * - I think a false hit is more serious than a false miss
43 1.2.4.2 rmind * - A false hit for an RPC that has Op(s) that order via seqid# must be
44 1.2.4.2 rmind * avoided at all cost
45 1.2.4.2 rmind * - A valid hit will probably happen a long time after the original reply
46 1.2.4.2 rmind * and the TCP socket that the original request was received on will no
47 1.2.4.2 rmind * longer be active
48 1.2.4.2 rmind * (The long time delay implies to me that LRU is not appropriate.)
49 1.2.4.2 rmind * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
50 1.2.4.2 rmind * in them as well as minimizing the risk of redoing retried non-idempotent
51 1.2.4.2 rmind * Ops.
52 1.2.4.2 rmind * Because it is biased towards avoiding false hits, multiple entries with
53 1.2.4.2 rmind * the same xid are to be expected, especially for the case of the entry
54 1.2.4.2 rmind * in the cache being related to a seqid# sequenced Op.
55 1.2.4.2 rmind *
56 1.2.4.2 rmind * The basic algorithm I'm about to code up:
57 1.2.4.2 rmind * - Null RPCs bypass the cache and are just done
58 1.2.4.2 rmind * For TCP
59 1.2.4.2 rmind * - key on <xid, NFS version> (as noted above, there can be several
60 1.2.4.2 rmind * entries with the same key)
61 1.2.4.2 rmind * When a request arrives:
62 1.2.4.2 rmind * For all that match key
63 1.2.4.2 rmind * - if RPC# != OR request_size !=
64 1.2.4.2 rmind * - not a match with this one
65 1.2.4.2 rmind * - if NFSv4 and received on same TCP socket OR
66 1.2.4.2 rmind * received on a TCP connection created before the
67 1.2.4.2 rmind * entry was cached
68 1.2.4.2 rmind * - not a match with this one
69 1.2.4.2 rmind * (V2,3 clients might retry on same TCP socket)
70 1.2.4.2 rmind * - calculate checksum on first N bytes of NFS XDR
71 1.2.4.2 rmind * - if checksum !=
72 1.2.4.2 rmind * - not a match for this one
73 1.2.4.2 rmind * If any of the remaining ones that match has a
74 1.2.4.2 rmind * seqid_refcnt > 0
75 1.2.4.2 rmind * - not a match (go do RPC, using new cache entry)
76 1.2.4.2 rmind * If one match left
77 1.2.4.2 rmind * - a hit (reply from cache)
78 1.2.4.2 rmind * else
79 1.2.4.2 rmind * - miss (go do RPC, using new cache entry)
80 1.2.4.2 rmind *
81 1.2.4.2 rmind * During processing of NFSv4 request:
82 1.2.4.2 rmind * - set a flag when a non-idempotent Op is processed
83 1.2.4.2 rmind * - when an Op that uses a seqid# (Open,...) is processed
84 1.2.4.2 rmind * - if same seqid# as referenced entry in cache
85 1.2.4.2 rmind * - free new cache entry
86 1.2.4.2 rmind * - reply from referenced cache entry
87 1.2.4.2 rmind * else if next seqid# in order
88 1.2.4.2 rmind * - free referenced cache entry
89 1.2.4.2 rmind * - increment seqid_refcnt on new cache entry
90 1.2.4.2 rmind * - set pointer from Openowner/Lockowner to
91 1.2.4.2 rmind * new cache entry (aka reference it)
92 1.2.4.2 rmind * else if first seqid# in sequence
93 1.2.4.2 rmind * - increment seqid_refcnt on new cache entry
94 1.2.4.2 rmind * - set pointer from Openowner/Lockowner to
95 1.2.4.2 rmind * new cache entry (aka reference it)
96 1.2.4.2 rmind *
97 1.2.4.2 rmind * At end of RPC processing:
98 1.2.4.2 rmind * - if seqid_refcnt > 0 OR flagged non-idempotent on new
99 1.2.4.2 rmind * cache entry
100 1.2.4.2 rmind * - save reply in cache entry
101 1.2.4.2 rmind * - calculate checksum on first N bytes of NFS XDR
102 1.2.4.2 rmind * request
103 1.2.4.2 rmind * - note op and length of XDR request (in bytes)
104 1.2.4.2 rmind * - timestamp it
105 1.2.4.2 rmind * else
106 1.2.4.2 rmind * - free new cache entry
107 1.2.4.2 rmind * - Send reply (noting info for socket activity check, below)
108 1.2.4.2 rmind *
109 1.2.4.2 rmind * For cache entries saved above:
110 1.2.4.2 rmind * - if saved since seqid_refcnt was > 0
111 1.2.4.2 rmind * - free when seqid_refcnt decrements to 0
112 1.2.4.2 rmind * (when next one in sequence is processed above, or
113 1.2.4.2 rmind * when Openowner/Lockowner is discarded)
114 1.2.4.2 rmind * else { non-idempotent Op(s) }
115 1.2.4.2 rmind * - free when
116 1.2.4.2 rmind * - some further activity observed on same
117 1.2.4.2 rmind * socket
118 1.2.4.2 rmind * (I'm not yet sure how I'm going to do
119 1.2.4.2 rmind * this. Maybe look at the TCP connection
120 1.2.4.2 rmind * to see if the send_tcp_sequence# is well
121 1.2.4.2 rmind * past sent reply OR K additional RPCs
122 1.2.4.2 rmind * replied on same socket OR?)
123 1.2.4.2 rmind * OR
124 1.2.4.2 rmind * - when very old (hours, days, weeks?)
125 1.2.4.2 rmind *
126 1.2.4.2 rmind * For UDP (v2, 3 only), pretty much the old way:
127 1.2.4.2 rmind * - key on <xid, NFS version, RPC#, Client host ip#>
128 1.2.4.2 rmind * (at most one entry for each key)
129 1.2.4.2 rmind *
130 1.2.4.2 rmind * When a Request arrives:
131 1.2.4.2 rmind * - if a match with entry via key
132 1.2.4.2 rmind * - if RPC marked In_progress
133 1.2.4.2 rmind * - discard request (don't send reply)
134 1.2.4.2 rmind * else
135 1.2.4.2 rmind * - reply from cache
136 1.2.4.2 rmind * - timestamp cache entry
137 1.2.4.2 rmind * else
138 1.2.4.2 rmind * - add entry to cache, marked In_progress
139 1.2.4.2 rmind * - do RPC
140 1.2.4.2 rmind * - when RPC done
141 1.2.4.2 rmind * - if RPC# non-idempotent
142 1.2.4.2 rmind * - mark entry Done (not In_progress)
143 1.2.4.2 rmind * - save reply
144 1.2.4.2 rmind * - timestamp cache entry
145 1.2.4.2 rmind * else
146 1.2.4.2 rmind * - free cache entry
147 1.2.4.2 rmind * - send reply
148 1.2.4.2 rmind *
149 1.2.4.2 rmind * Later, entries with saved replies are free'd a short time (few minutes)
150 1.2.4.2 rmind * after reply sent (timestamp).
151 1.2.4.2 rmind * Reference: Chet Juszczak, "Improving the Performance and Correctness
152 1.2.4.2 rmind * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
153 1.2.4.2 rmind * pages 53-63. San Diego, February 1989.
154 1.2.4.2 rmind * for the UDP case.
155 1.2.4.2 rmind * nfsrc_floodlevel is set to the allowable upper limit for saved replies
156 1.2.4.2 rmind * for TCP. For V3, a reply won't be saved when the flood level is
157 1.2.4.2 rmind * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
158 1.2.4.2 rmind * that case. This level should be set high enough that this almost
159 1.2.4.2 rmind * never happens.
160 1.2.4.2 rmind */
161 1.2.4.2 rmind #ifndef APPLEKEXT
162 1.2.4.2 rmind #include <fs/nfs/nfsport.h>
163 1.2.4.2 rmind
164 1.2.4.2 rmind extern struct nfsstats newnfsstats;
165 1.2.4.2 rmind extern struct mtx nfsrc_udpmtx;
166 1.2.4.2 rmind extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
167 1.2.4.2 rmind int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
168 1.2.4.2 rmind #endif /* !APPLEKEXT */
169 1.2.4.2 rmind
170 1.2.4.2 rmind SYSCTL_DECL(_vfs_nfsd);
171 1.2.4.2 rmind
172 1.2.4.2 rmind static u_int nfsrc_tcphighwater = 0;
173 1.2.4.2 rmind static int
174 1.2.4.2 rmind sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
175 1.2.4.2 rmind {
176 1.2.4.2 rmind int error, newhighwater;
177 1.2.4.2 rmind
178 1.2.4.2 rmind newhighwater = nfsrc_tcphighwater;
179 1.2.4.2 rmind error = sysctl_handle_int(oidp, &newhighwater, 0, req);
180 1.2.4.2 rmind if (error != 0 || req->newptr == NULL)
181 1.2.4.2 rmind return (error);
182 1.2.4.2 rmind if (newhighwater < 0)
183 1.2.4.2 rmind return (EINVAL);
184 1.2.4.2 rmind if (newhighwater >= nfsrc_floodlevel)
185 1.2.4.2 rmind nfsrc_floodlevel = newhighwater + newhighwater / 5;
186 1.2.4.2 rmind nfsrc_tcphighwater = newhighwater;
187 1.2.4.2 rmind return (0);
188 1.2.4.2 rmind }
189 1.2.4.2 rmind SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
190 1.2.4.2 rmind sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
191 1.2.4.2 rmind "High water mark for TCP cache entries");
192 1.2.4.2 rmind
193 1.2.4.2 rmind static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
194 1.2.4.2 rmind SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
195 1.2.4.2 rmind &nfsrc_udphighwater, 0,
196 1.2.4.2 rmind "High water mark for UDP cache entries");
197 1.2.4.2 rmind static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
198 1.2.4.2 rmind SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
199 1.2.4.2 rmind &nfsrc_tcptimeout, 0,
200 1.2.4.2 rmind "Timeout for TCP entries in the DRC");
201 1.2.4.2 rmind static u_int nfsrc_tcpnonidempotent = 1;
202 1.2.4.2 rmind SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
203 1.2.4.2 rmind &nfsrc_tcpnonidempotent, 0,
204 1.2.4.2 rmind "Enable the DRC for NFS over TCP");
205 1.2.4.2 rmind
206 1.2.4.2 rmind static int nfsrc_udpcachesize = 0;
207 1.2.4.2 rmind static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
208 1.2.4.2 rmind static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
209 1.2.4.2 rmind
210 1.2.4.2 rmind /*
211 1.2.4.2 rmind * and the reverse mapping from generic to Version 2 procedure numbers
212 1.2.4.2 rmind */
213 1.2.4.2 rmind static int newnfsv2_procid[NFS_V3NPROCS] = {
214 1.2.4.2 rmind NFSV2PROC_NULL,
215 1.2.4.2 rmind NFSV2PROC_GETATTR,
216 1.2.4.2 rmind NFSV2PROC_SETATTR,
217 1.2.4.2 rmind NFSV2PROC_LOOKUP,
218 1.2.4.2 rmind NFSV2PROC_NOOP,
219 1.2.4.2 rmind NFSV2PROC_READLINK,
220 1.2.4.2 rmind NFSV2PROC_READ,
221 1.2.4.2 rmind NFSV2PROC_WRITE,
222 1.2.4.2 rmind NFSV2PROC_CREATE,
223 1.2.4.2 rmind NFSV2PROC_MKDIR,
224 1.2.4.2 rmind NFSV2PROC_SYMLINK,
225 1.2.4.2 rmind NFSV2PROC_CREATE,
226 1.2.4.2 rmind NFSV2PROC_REMOVE,
227 1.2.4.2 rmind NFSV2PROC_RMDIR,
228 1.2.4.2 rmind NFSV2PROC_RENAME,
229 1.2.4.2 rmind NFSV2PROC_LINK,
230 1.2.4.2 rmind NFSV2PROC_READDIR,
231 1.2.4.2 rmind NFSV2PROC_NOOP,
232 1.2.4.2 rmind NFSV2PROC_STATFS,
233 1.2.4.2 rmind NFSV2PROC_NOOP,
234 1.2.4.2 rmind NFSV2PROC_NOOP,
235 1.2.4.2 rmind NFSV2PROC_NOOP,
236 1.2.4.2 rmind };
237 1.2.4.2 rmind
238 1.2.4.2 rmind #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
239 1.2.4.2 rmind #define NFSRCUDPHASH(xid) \
240 1.2.4.2 rmind (&nfsrvudphashtbl[nfsrc_hash(xid)])
241 1.2.4.2 rmind #define NFSRCHASH(xid) \
242 1.2.4.2 rmind (&nfsrchash_table[nfsrc_hash(xid)].tbl)
243 1.2.4.2 rmind #define TRUE 1
244 1.2.4.2 rmind #define FALSE 0
245 1.2.4.2 rmind #define NFSRVCACHE_CHECKLEN 100
246 1.2.4.2 rmind
247 1.2.4.2 rmind /* True iff the rpc reply is an nfs status ONLY! */
248 1.2.4.2 rmind static int nfsv2_repstat[NFS_V3NPROCS] = {
249 1.2.4.2 rmind FALSE,
250 1.2.4.2 rmind FALSE,
251 1.2.4.2 rmind FALSE,
252 1.2.4.2 rmind FALSE,
253 1.2.4.2 rmind FALSE,
254 1.2.4.2 rmind FALSE,
255 1.2.4.2 rmind FALSE,
256 1.2.4.2 rmind FALSE,
257 1.2.4.2 rmind FALSE,
258 1.2.4.2 rmind FALSE,
259 1.2.4.2 rmind TRUE,
260 1.2.4.2 rmind TRUE,
261 1.2.4.2 rmind TRUE,
262 1.2.4.2 rmind TRUE,
263 1.2.4.2 rmind FALSE,
264 1.2.4.2 rmind TRUE,
265 1.2.4.2 rmind FALSE,
266 1.2.4.2 rmind FALSE,
267 1.2.4.2 rmind FALSE,
268 1.2.4.2 rmind FALSE,
269 1.2.4.2 rmind FALSE,
270 1.2.4.2 rmind FALSE,
271 1.2.4.2 rmind };
272 1.2.4.2 rmind
273 1.2.4.2 rmind /*
274 1.2.4.2 rmind * Will NFS want to work over IPv6 someday?
275 1.2.4.2 rmind */
276 1.2.4.2 rmind #define NETFAMILY(rp) \
277 1.2.4.2 rmind (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
278 1.2.4.2 rmind
279 1.2.4.2 rmind /* local functions */
280 1.2.4.2 rmind static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
281 1.2.4.2 rmind static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
282 1.2.4.2 rmind static void nfsrc_lock(struct nfsrvcache *rp);
283 1.2.4.2 rmind static void nfsrc_unlock(struct nfsrvcache *rp);
284 1.2.4.2 rmind static void nfsrc_wanted(struct nfsrvcache *rp);
285 1.2.4.2 rmind static void nfsrc_freecache(struct nfsrvcache *rp);
286 1.2.4.2 rmind static void nfsrc_trimcache(u_int64_t, struct socket *);
287 1.2.4.2 rmind static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
288 1.2.4.2 rmind struct socket *);
289 1.2.4.2 rmind static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
290 1.2.4.2 rmind static void nfsrc_marksametcpconn(u_int64_t);
291 1.2.4.2 rmind
292 1.2.4.2 rmind /*
293 1.2.4.2 rmind * Return the correct mutex for this cache entry.
294 1.2.4.2 rmind */
295 1.2.4.2 rmind static __inline struct mtx *
296 1.2.4.2 rmind nfsrc_cachemutex(struct nfsrvcache *rp)
297 1.2.4.2 rmind {
298 1.2.4.2 rmind
299 1.2.4.2 rmind if ((rp->rc_flag & RC_UDP) != 0)
300 1.2.4.2 rmind return (&nfsrc_udpmtx);
301 1.2.4.2 rmind return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
302 1.2.4.2 rmind }
303 1.2.4.2 rmind
304 1.2.4.2 rmind /*
305 1.2.4.2 rmind * Initialize the server request cache list
306 1.2.4.2 rmind */
307 1.2.4.2 rmind APPLESTATIC void
308 1.2.4.2 rmind nfsrvd_initcache(void)
309 1.2.4.2 rmind {
310 1.2.4.2 rmind int i;
311 1.2.4.2 rmind static int inited = 0;
312 1.2.4.2 rmind
313 1.2.4.2 rmind if (inited)
314 1.2.4.2 rmind return;
315 1.2.4.2 rmind inited = 1;
316 1.2.4.2 rmind for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
317 1.2.4.2 rmind LIST_INIT(&nfsrvudphashtbl[i]);
318 1.2.4.2 rmind LIST_INIT(&nfsrchash_table[i].tbl);
319 1.2.4.2 rmind }
320 1.2.4.2 rmind TAILQ_INIT(&nfsrvudplru);
321 1.2.4.2 rmind nfsrc_tcpsavedreplies = 0;
322 1.2.4.2 rmind nfsrc_udpcachesize = 0;
323 1.2.4.2 rmind newnfsstats.srvcache_tcppeak = 0;
324 1.2.4.2 rmind newnfsstats.srvcache_size = 0;
325 1.2.4.2 rmind }
326 1.2.4.2 rmind
327 1.2.4.2 rmind /*
328 1.2.4.2 rmind * Get a cache entry for this request. Basically just malloc a new one
329 1.2.4.2 rmind * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
330 1.2.4.2 rmind * Call nfsrc_trimcache() to clean up the cache before returning.
331 1.2.4.2 rmind */
332 1.2.4.2 rmind APPLESTATIC int
333 1.2.4.2 rmind nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
334 1.2.4.2 rmind {
335 1.2.4.2 rmind struct nfsrvcache *newrp;
336 1.2.4.2 rmind int ret;
337 1.2.4.2 rmind
338 1.2.4.2 rmind if (nd->nd_procnum == NFSPROC_NULL)
339 1.2.4.2 rmind panic("nfsd cache null");
340 1.2.4.2 rmind MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
341 1.2.4.2 rmind M_NFSRVCACHE, M_WAITOK);
342 1.2.4.2 rmind NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
343 1.2.4.2 rmind if (nd->nd_flag & ND_NFSV4)
344 1.2.4.2 rmind newrp->rc_flag = RC_NFSV4;
345 1.2.4.2 rmind else if (nd->nd_flag & ND_NFSV3)
346 1.2.4.2 rmind newrp->rc_flag = RC_NFSV3;
347 1.2.4.2 rmind else
348 1.2.4.2 rmind newrp->rc_flag = RC_NFSV2;
349 1.2.4.2 rmind newrp->rc_xid = nd->nd_retxid;
350 1.2.4.2 rmind newrp->rc_proc = nd->nd_procnum;
351 1.2.4.2 rmind newrp->rc_sockref = nd->nd_sockref;
352 1.2.4.2 rmind newrp->rc_cachetime = nd->nd_tcpconntime;
353 1.2.4.2 rmind if (nd->nd_flag & ND_SAMETCPCONN)
354 1.2.4.2 rmind newrp->rc_flag |= RC_SAMETCPCONN;
355 1.2.4.2 rmind if (nd->nd_nam2 != NULL) {
356 1.2.4.2 rmind newrp->rc_flag |= RC_UDP;
357 1.2.4.2 rmind ret = nfsrc_getudp(nd, newrp);
358 1.2.4.2 rmind } else {
359 1.2.4.2 rmind ret = nfsrc_gettcp(nd, newrp);
360 1.2.4.2 rmind }
361 1.2.4.2 rmind nfsrc_trimcache(nd->nd_sockref, so);
362 1.2.4.2 rmind NFSEXITCODE2(0, nd);
363 1.2.4.2 rmind return (ret);
364 1.2.4.2 rmind }
365 1.2.4.2 rmind
366 1.2.4.2 rmind /*
367 1.2.4.2 rmind * For UDP (v2, v3):
368 1.2.4.2 rmind * - key on <xid, NFS version, RPC#, Client host ip#>
369 1.2.4.2 rmind * (at most one entry for each key)
370 1.2.4.2 rmind */
371 1.2.4.2 rmind static int
372 1.2.4.2 rmind nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
373 1.2.4.2 rmind {
374 1.2.4.2 rmind struct nfsrvcache *rp;
375 1.2.4.2 rmind struct sockaddr_in *saddr;
376 1.2.4.2 rmind struct sockaddr_in6 *saddr6;
377 1.2.4.2 rmind struct nfsrvhashhead *hp;
378 1.2.4.2 rmind int ret = 0;
379 1.2.4.2 rmind struct mtx *mutex;
380 1.2.4.2 rmind
381 1.2.4.2 rmind mutex = nfsrc_cachemutex(newrp);
382 1.2.4.2 rmind hp = NFSRCUDPHASH(newrp->rc_xid);
383 1.2.4.2 rmind loop:
384 1.2.4.2 rmind mtx_lock(mutex);
385 1.2.4.2 rmind LIST_FOREACH(rp, hp, rc_hash) {
386 1.2.4.2 rmind if (newrp->rc_xid == rp->rc_xid &&
387 1.2.4.2 rmind newrp->rc_proc == rp->rc_proc &&
388 1.2.4.2 rmind (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
389 1.2.4.2 rmind nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
390 1.2.4.2 rmind if ((rp->rc_flag & RC_LOCKED) != 0) {
391 1.2.4.2 rmind rp->rc_flag |= RC_WANTED;
392 1.2.4.2 rmind (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
393 1.2.4.2 rmind "nfsrc", 10 * hz);
394 1.2.4.2 rmind goto loop;
395 1.2.4.2 rmind }
396 1.2.4.2 rmind if (rp->rc_flag == 0)
397 1.2.4.2 rmind panic("nfs udp cache0");
398 1.2.4.2 rmind rp->rc_flag |= RC_LOCKED;
399 1.2.4.2 rmind TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
400 1.2.4.2 rmind TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
401 1.2.4.2 rmind if (rp->rc_flag & RC_INPROG) {
402 1.2.4.2 rmind newnfsstats.srvcache_inproghits++;
403 1.2.4.2 rmind mtx_unlock(mutex);
404 1.2.4.2 rmind ret = RC_DROPIT;
405 1.2.4.2 rmind } else if (rp->rc_flag & RC_REPSTATUS) {
406 1.2.4.2 rmind /*
407 1.2.4.2 rmind * V2 only.
408 1.2.4.2 rmind */
409 1.2.4.2 rmind newnfsstats.srvcache_nonidemdonehits++;
410 1.2.4.2 rmind mtx_unlock(mutex);
411 1.2.4.2 rmind nfsrvd_rephead(nd);
412 1.2.4.2 rmind *(nd->nd_errp) = rp->rc_status;
413 1.2.4.2 rmind ret = RC_REPLY;
414 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC +
415 1.2.4.2 rmind NFSRVCACHE_UDPTIMEOUT;
416 1.2.4.2 rmind } else if (rp->rc_flag & RC_REPMBUF) {
417 1.2.4.2 rmind newnfsstats.srvcache_nonidemdonehits++;
418 1.2.4.2 rmind mtx_unlock(mutex);
419 1.2.4.2 rmind nd->nd_mreq = m_copym(rp->rc_reply, 0,
420 1.2.4.2 rmind M_COPYALL, M_WAITOK);
421 1.2.4.2 rmind ret = RC_REPLY;
422 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC +
423 1.2.4.2 rmind NFSRVCACHE_UDPTIMEOUT;
424 1.2.4.2 rmind } else {
425 1.2.4.2 rmind panic("nfs udp cache1");
426 1.2.4.2 rmind }
427 1.2.4.2 rmind nfsrc_unlock(rp);
428 1.2.4.2 rmind free((caddr_t)newrp, M_NFSRVCACHE);
429 1.2.4.2 rmind goto out;
430 1.2.4.2 rmind }
431 1.2.4.2 rmind }
432 1.2.4.2 rmind newnfsstats.srvcache_misses++;
433 1.2.4.2 rmind atomic_add_int(&newnfsstats.srvcache_size, 1);
434 1.2.4.2 rmind nfsrc_udpcachesize++;
435 1.2.4.2 rmind
436 1.2.4.2 rmind newrp->rc_flag |= RC_INPROG;
437 1.2.4.2 rmind saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
438 1.2.4.2 rmind if (saddr->sin_family == AF_INET)
439 1.2.4.2 rmind newrp->rc_inet = saddr->sin_addr.s_addr;
440 1.2.4.2 rmind else if (saddr->sin_family == AF_INET6) {
441 1.2.4.2 rmind saddr6 = (struct sockaddr_in6 *)saddr;
442 1.2.4.2 rmind NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
443 1.2.4.2 rmind sizeof (struct in6_addr));
444 1.2.4.2 rmind newrp->rc_flag |= RC_INETIPV6;
445 1.2.4.2 rmind }
446 1.2.4.2 rmind LIST_INSERT_HEAD(hp, newrp, rc_hash);
447 1.2.4.2 rmind TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
448 1.2.4.2 rmind mtx_unlock(mutex);
449 1.2.4.2 rmind nd->nd_rp = newrp;
450 1.2.4.2 rmind ret = RC_DOIT;
451 1.2.4.2 rmind
452 1.2.4.2 rmind out:
453 1.2.4.2 rmind NFSEXITCODE2(0, nd);
454 1.2.4.2 rmind return (ret);
455 1.2.4.2 rmind }
456 1.2.4.2 rmind
457 1.2.4.2 rmind /*
458 1.2.4.2 rmind * Update a request cache entry after the rpc has been done
459 1.2.4.2 rmind */
460 1.2.4.2 rmind APPLESTATIC struct nfsrvcache *
461 1.2.4.2 rmind nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
462 1.2.4.2 rmind {
463 1.2.4.2 rmind struct nfsrvcache *rp;
464 1.2.4.2 rmind struct nfsrvcache *retrp = NULL;
465 1.2.4.2 rmind mbuf_t m;
466 1.2.4.2 rmind struct mtx *mutex;
467 1.2.4.2 rmind
468 1.2.4.2 rmind rp = nd->nd_rp;
469 1.2.4.2 rmind if (!rp)
470 1.2.4.2 rmind panic("nfsrvd_updatecache null rp");
471 1.2.4.2 rmind nd->nd_rp = NULL;
472 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
473 1.2.4.2 rmind mtx_lock(mutex);
474 1.2.4.2 rmind nfsrc_lock(rp);
475 1.2.4.2 rmind if (!(rp->rc_flag & RC_INPROG))
476 1.2.4.2 rmind panic("nfsrvd_updatecache not inprog");
477 1.2.4.2 rmind rp->rc_flag &= ~RC_INPROG;
478 1.2.4.2 rmind if (rp->rc_flag & RC_UDP) {
479 1.2.4.2 rmind TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
480 1.2.4.2 rmind TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
481 1.2.4.2 rmind }
482 1.2.4.2 rmind
483 1.2.4.2 rmind /*
484 1.2.4.2 rmind * Reply from cache is a special case returned by nfsrv_checkseqid().
485 1.2.4.2 rmind */
486 1.2.4.2 rmind if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
487 1.2.4.2 rmind newnfsstats.srvcache_nonidemdonehits++;
488 1.2.4.2 rmind mtx_unlock(mutex);
489 1.2.4.2 rmind nd->nd_repstat = 0;
490 1.2.4.2 rmind if (nd->nd_mreq)
491 1.2.4.2 rmind mbuf_freem(nd->nd_mreq);
492 1.2.4.2 rmind if (!(rp->rc_flag & RC_REPMBUF))
493 1.2.4.2 rmind panic("reply from cache");
494 1.2.4.2 rmind nd->nd_mreq = m_copym(rp->rc_reply, 0,
495 1.2.4.2 rmind M_COPYALL, M_WAITOK);
496 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
497 1.2.4.2 rmind nfsrc_unlock(rp);
498 1.2.4.2 rmind goto out;
499 1.2.4.2 rmind }
500 1.2.4.2 rmind
501 1.2.4.2 rmind /*
502 1.2.4.2 rmind * If rc_refcnt > 0, save it
503 1.2.4.2 rmind * For UDP, save it if ND_SAVEREPLY is set
504 1.2.4.2 rmind * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
505 1.2.4.2 rmind */
506 1.2.4.2 rmind if (nd->nd_repstat != NFSERR_DONTREPLY &&
507 1.2.4.2 rmind (rp->rc_refcnt > 0 ||
508 1.2.4.2 rmind ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
509 1.2.4.2 rmind ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
510 1.2.4.2 rmind nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
511 1.2.4.2 rmind nfsrc_tcpnonidempotent))) {
512 1.2.4.2 rmind if (rp->rc_refcnt > 0) {
513 1.2.4.2 rmind if (!(rp->rc_flag & RC_NFSV4))
514 1.2.4.2 rmind panic("update_cache refcnt");
515 1.2.4.2 rmind rp->rc_flag |= RC_REFCNT;
516 1.2.4.2 rmind }
517 1.2.4.2 rmind if ((nd->nd_flag & ND_NFSV2) &&
518 1.2.4.2 rmind nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
519 1.2.4.2 rmind rp->rc_status = nd->nd_repstat;
520 1.2.4.2 rmind rp->rc_flag |= RC_REPSTATUS;
521 1.2.4.2 rmind mtx_unlock(mutex);
522 1.2.4.2 rmind } else {
523 1.2.4.2 rmind if (!(rp->rc_flag & RC_UDP)) {
524 1.2.4.2 rmind atomic_add_int(&nfsrc_tcpsavedreplies, 1);
525 1.2.4.2 rmind if (nfsrc_tcpsavedreplies >
526 1.2.4.2 rmind newnfsstats.srvcache_tcppeak)
527 1.2.4.2 rmind newnfsstats.srvcache_tcppeak =
528 1.2.4.2 rmind nfsrc_tcpsavedreplies;
529 1.2.4.2 rmind }
530 1.2.4.2 rmind mtx_unlock(mutex);
531 1.2.4.2 rmind m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
532 1.2.4.2 rmind mtx_lock(mutex);
533 1.2.4.2 rmind rp->rc_reply = m;
534 1.2.4.2 rmind rp->rc_flag |= RC_REPMBUF;
535 1.2.4.2 rmind mtx_unlock(mutex);
536 1.2.4.2 rmind }
537 1.2.4.2 rmind if (rp->rc_flag & RC_UDP) {
538 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC +
539 1.2.4.2 rmind NFSRVCACHE_UDPTIMEOUT;
540 1.2.4.2 rmind nfsrc_unlock(rp);
541 1.2.4.2 rmind } else {
542 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
543 1.2.4.2 rmind if (rp->rc_refcnt > 0)
544 1.2.4.2 rmind nfsrc_unlock(rp);
545 1.2.4.2 rmind else
546 1.2.4.2 rmind retrp = rp;
547 1.2.4.2 rmind }
548 1.2.4.2 rmind } else {
549 1.2.4.2 rmind nfsrc_freecache(rp);
550 1.2.4.2 rmind mtx_unlock(mutex);
551 1.2.4.2 rmind }
552 1.2.4.2 rmind
553 1.2.4.2 rmind out:
554 1.2.4.2 rmind nfsrc_trimcache(nd->nd_sockref, so);
555 1.2.4.2 rmind NFSEXITCODE2(0, nd);
556 1.2.4.2 rmind return (retrp);
557 1.2.4.2 rmind }
558 1.2.4.2 rmind
559 1.2.4.2 rmind /*
560 1.2.4.2 rmind * Invalidate and, if possible, free an in prog cache entry.
561 1.2.4.2 rmind * Must not sleep.
562 1.2.4.2 rmind */
563 1.2.4.2 rmind APPLESTATIC void
564 1.2.4.2 rmind nfsrvd_delcache(struct nfsrvcache *rp)
565 1.2.4.2 rmind {
566 1.2.4.2 rmind struct mtx *mutex;
567 1.2.4.2 rmind
568 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
569 1.2.4.2 rmind if (!(rp->rc_flag & RC_INPROG))
570 1.2.4.2 rmind panic("nfsrvd_delcache not in prog");
571 1.2.4.2 rmind mtx_lock(mutex);
572 1.2.4.2 rmind rp->rc_flag &= ~RC_INPROG;
573 1.2.4.2 rmind if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
574 1.2.4.2 rmind nfsrc_freecache(rp);
575 1.2.4.2 rmind mtx_unlock(mutex);
576 1.2.4.2 rmind }
577 1.2.4.2 rmind
578 1.2.4.2 rmind /*
579 1.2.4.2 rmind * Called after nfsrvd_updatecache() once the reply is sent, to update
580 1.2.4.2 rmind * the entry for nfsrc_activesocket() and unlock it. The argument is
581 1.2.4.2 rmind * the pointer returned by nfsrvd_updatecache().
582 1.2.4.2 rmind */
583 1.2.4.2 rmind APPLESTATIC void
584 1.2.4.2 rmind nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
585 1.2.4.2 rmind {
586 1.2.4.2 rmind tcp_seq tmp_seq;
587 1.2.4.2 rmind struct mtx *mutex;
588 1.2.4.2 rmind
589 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
590 1.2.4.2 rmind if (!(rp->rc_flag & RC_LOCKED))
591 1.2.4.2 rmind panic("nfsrvd_sentcache not locked");
592 1.2.4.2 rmind if (!err) {
593 1.2.4.2 rmind if ((so->so_proto->pr_domain->dom_family != AF_INET &&
594 1.2.4.2 rmind so->so_proto->pr_domain->dom_family != AF_INET6) ||
595 1.2.4.2 rmind so->so_proto->pr_protocol != IPPROTO_TCP)
596 1.2.4.2 rmind panic("nfs sent cache");
597 1.2.4.2 rmind if (nfsrv_getsockseqnum(so, &tmp_seq)) {
598 1.2.4.2 rmind mtx_lock(mutex);
599 1.2.4.2 rmind rp->rc_tcpseq = tmp_seq;
600 1.2.4.2 rmind rp->rc_flag |= RC_TCPSEQ;
601 1.2.4.2 rmind mtx_unlock(mutex);
602 1.2.4.2 rmind }
603 1.2.4.2 rmind }
604 1.2.4.2 rmind nfsrc_unlock(rp);
605 1.2.4.2 rmind }
606 1.2.4.2 rmind
607 1.2.4.2 rmind /*
608 1.2.4.2 rmind * Get a cache entry for TCP
609 1.2.4.2 rmind * - key on <xid, nfs version>
610 1.2.4.2 rmind * (allow multiple entries for a given key)
611 1.2.4.2 rmind */
612 1.2.4.2 rmind static int
613 1.2.4.2 rmind nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
614 1.2.4.2 rmind {
615 1.2.4.2 rmind struct nfsrvcache *rp, *nextrp;
616 1.2.4.2 rmind int i;
617 1.2.4.2 rmind struct nfsrvcache *hitrp;
618 1.2.4.2 rmind struct nfsrvhashhead *hp, nfsrc_templist;
619 1.2.4.2 rmind int hit, ret = 0;
620 1.2.4.2 rmind struct mtx *mutex;
621 1.2.4.2 rmind
622 1.2.4.2 rmind mutex = nfsrc_cachemutex(newrp);
623 1.2.4.2 rmind hp = NFSRCHASH(newrp->rc_xid);
624 1.2.4.2 rmind newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
625 1.2.4.2 rmind tryagain:
626 1.2.4.2 rmind mtx_lock(mutex);
627 1.2.4.2 rmind hit = 1;
628 1.2.4.2 rmind LIST_INIT(&nfsrc_templist);
629 1.2.4.2 rmind /*
630 1.2.4.2 rmind * Get all the matches and put them on the temp list.
631 1.2.4.2 rmind */
632 1.2.4.2 rmind rp = LIST_FIRST(hp);
633 1.2.4.2 rmind while (rp != NULL) {
634 1.2.4.2 rmind nextrp = LIST_NEXT(rp, rc_hash);
635 1.2.4.2 rmind if (newrp->rc_xid == rp->rc_xid &&
636 1.2.4.2 rmind (!(rp->rc_flag & RC_INPROG) ||
637 1.2.4.2 rmind ((newrp->rc_flag & RC_SAMETCPCONN) &&
638 1.2.4.2 rmind newrp->rc_sockref == rp->rc_sockref)) &&
639 1.2.4.2 rmind (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
640 1.2.4.2 rmind newrp->rc_proc == rp->rc_proc &&
641 1.2.4.2 rmind ((newrp->rc_flag & RC_NFSV4) &&
642 1.2.4.2 rmind newrp->rc_sockref != rp->rc_sockref &&
643 1.2.4.2 rmind newrp->rc_cachetime >= rp->rc_cachetime)
644 1.2.4.2 rmind && newrp->rc_reqlen == rp->rc_reqlen &&
645 1.2.4.2 rmind newrp->rc_cksum == rp->rc_cksum) {
646 1.2.4.2 rmind LIST_REMOVE(rp, rc_hash);
647 1.2.4.2 rmind LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
648 1.2.4.2 rmind }
649 1.2.4.2 rmind rp = nextrp;
650 1.2.4.2 rmind }
651 1.2.4.2 rmind
652 1.2.4.2 rmind /*
653 1.2.4.2 rmind * Now, use nfsrc_templist to decide if there is a match.
654 1.2.4.2 rmind */
655 1.2.4.2 rmind i = 0;
656 1.2.4.2 rmind LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
657 1.2.4.2 rmind i++;
658 1.2.4.2 rmind if (rp->rc_refcnt > 0) {
659 1.2.4.2 rmind hit = 0;
660 1.2.4.2 rmind break;
661 1.2.4.2 rmind }
662 1.2.4.2 rmind }
663 1.2.4.2 rmind /*
664 1.2.4.2 rmind * Can be a hit only if one entry left.
665 1.2.4.2 rmind * Note possible hit entry and put nfsrc_templist back on hash
666 1.2.4.2 rmind * list.
667 1.2.4.2 rmind */
668 1.2.4.2 rmind if (i != 1)
669 1.2.4.2 rmind hit = 0;
670 1.2.4.2 rmind hitrp = rp = LIST_FIRST(&nfsrc_templist);
671 1.2.4.2 rmind while (rp != NULL) {
672 1.2.4.2 rmind nextrp = LIST_NEXT(rp, rc_hash);
673 1.2.4.2 rmind LIST_REMOVE(rp, rc_hash);
674 1.2.4.2 rmind LIST_INSERT_HEAD(hp, rp, rc_hash);
675 1.2.4.2 rmind rp = nextrp;
676 1.2.4.2 rmind }
677 1.2.4.2 rmind if (LIST_FIRST(&nfsrc_templist) != NULL)
678 1.2.4.2 rmind panic("nfs gettcp cache templist");
679 1.2.4.2 rmind
680 1.2.4.2 rmind if (hit) {
681 1.2.4.2 rmind rp = hitrp;
682 1.2.4.2 rmind if ((rp->rc_flag & RC_LOCKED) != 0) {
683 1.2.4.2 rmind rp->rc_flag |= RC_WANTED;
684 1.2.4.2 rmind (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
685 1.2.4.2 rmind "nfsrc", 10 * hz);
686 1.2.4.2 rmind goto tryagain;
687 1.2.4.2 rmind }
688 1.2.4.2 rmind if (rp->rc_flag == 0)
689 1.2.4.2 rmind panic("nfs tcp cache0");
690 1.2.4.2 rmind rp->rc_flag |= RC_LOCKED;
691 1.2.4.2 rmind if (rp->rc_flag & RC_INPROG) {
692 1.2.4.2 rmind newnfsstats.srvcache_inproghits++;
693 1.2.4.2 rmind mtx_unlock(mutex);
694 1.2.4.2 rmind if (newrp->rc_sockref == rp->rc_sockref)
695 1.2.4.2 rmind nfsrc_marksametcpconn(rp->rc_sockref);
696 1.2.4.2 rmind ret = RC_DROPIT;
697 1.2.4.2 rmind } else if (rp->rc_flag & RC_REPSTATUS) {
698 1.2.4.2 rmind /*
699 1.2.4.2 rmind * V2 only.
700 1.2.4.2 rmind */
701 1.2.4.2 rmind newnfsstats.srvcache_nonidemdonehits++;
702 1.2.4.2 rmind mtx_unlock(mutex);
703 1.2.4.2 rmind if (newrp->rc_sockref == rp->rc_sockref)
704 1.2.4.2 rmind nfsrc_marksametcpconn(rp->rc_sockref);
705 1.2.4.2 rmind ret = RC_REPLY;
706 1.2.4.2 rmind nfsrvd_rephead(nd);
707 1.2.4.2 rmind *(nd->nd_errp) = rp->rc_status;
708 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
709 1.2.4.2 rmind } else if (rp->rc_flag & RC_REPMBUF) {
710 1.2.4.2 rmind newnfsstats.srvcache_nonidemdonehits++;
711 1.2.4.2 rmind mtx_unlock(mutex);
712 1.2.4.2 rmind if (newrp->rc_sockref == rp->rc_sockref)
713 1.2.4.2 rmind nfsrc_marksametcpconn(rp->rc_sockref);
714 1.2.4.2 rmind ret = RC_REPLY;
715 1.2.4.2 rmind nd->nd_mreq = m_copym(rp->rc_reply, 0,
716 1.2.4.2 rmind M_COPYALL, M_WAITOK);
717 1.2.4.2 rmind rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
718 1.2.4.2 rmind } else {
719 1.2.4.2 rmind panic("nfs tcp cache1");
720 1.2.4.2 rmind }
721 1.2.4.2 rmind nfsrc_unlock(rp);
722 1.2.4.2 rmind free((caddr_t)newrp, M_NFSRVCACHE);
723 1.2.4.2 rmind goto out;
724 1.2.4.2 rmind }
725 1.2.4.2 rmind newnfsstats.srvcache_misses++;
726 1.2.4.2 rmind atomic_add_int(&newnfsstats.srvcache_size, 1);
727 1.2.4.2 rmind
728 1.2.4.2 rmind /*
729 1.2.4.2 rmind * For TCP, multiple entries for a key are allowed, so don't
730 1.2.4.2 rmind * chain it into the hash table until done.
731 1.2.4.2 rmind */
732 1.2.4.2 rmind newrp->rc_cachetime = NFSD_MONOSEC;
733 1.2.4.2 rmind newrp->rc_flag |= RC_INPROG;
734 1.2.4.2 rmind LIST_INSERT_HEAD(hp, newrp, rc_hash);
735 1.2.4.2 rmind mtx_unlock(mutex);
736 1.2.4.2 rmind nd->nd_rp = newrp;
737 1.2.4.2 rmind ret = RC_DOIT;
738 1.2.4.2 rmind
739 1.2.4.2 rmind out:
740 1.2.4.2 rmind NFSEXITCODE2(0, nd);
741 1.2.4.2 rmind return (ret);
742 1.2.4.2 rmind }
743 1.2.4.2 rmind
744 1.2.4.2 rmind /*
745 1.2.4.2 rmind * Lock a cache entry.
746 1.2.4.2 rmind */
747 1.2.4.2 rmind static void
748 1.2.4.2 rmind nfsrc_lock(struct nfsrvcache *rp)
749 1.2.4.2 rmind {
750 1.2.4.2 rmind struct mtx *mutex;
751 1.2.4.2 rmind
752 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
753 1.2.4.2 rmind mtx_assert(mutex, MA_OWNED);
754 1.2.4.2 rmind while ((rp->rc_flag & RC_LOCKED) != 0) {
755 1.2.4.2 rmind rp->rc_flag |= RC_WANTED;
756 1.2.4.2 rmind (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
757 1.2.4.2 rmind }
758 1.2.4.2 rmind rp->rc_flag |= RC_LOCKED;
759 1.2.4.2 rmind }
760 1.2.4.2 rmind
761 1.2.4.2 rmind /*
762 1.2.4.2 rmind * Unlock a cache entry.
763 1.2.4.2 rmind */
764 1.2.4.2 rmind static void
765 1.2.4.2 rmind nfsrc_unlock(struct nfsrvcache *rp)
766 1.2.4.2 rmind {
767 1.2.4.2 rmind struct mtx *mutex;
768 1.2.4.2 rmind
769 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
770 1.2.4.2 rmind mtx_lock(mutex);
771 1.2.4.2 rmind rp->rc_flag &= ~RC_LOCKED;
772 1.2.4.2 rmind nfsrc_wanted(rp);
773 1.2.4.2 rmind mtx_unlock(mutex);
774 1.2.4.2 rmind }
775 1.2.4.2 rmind
776 1.2.4.2 rmind /*
777 1.2.4.2 rmind * Wakeup anyone wanting entry.
778 1.2.4.2 rmind */
779 1.2.4.2 rmind static void
780 1.2.4.2 rmind nfsrc_wanted(struct nfsrvcache *rp)
781 1.2.4.2 rmind {
782 1.2.4.2 rmind if (rp->rc_flag & RC_WANTED) {
783 1.2.4.2 rmind rp->rc_flag &= ~RC_WANTED;
784 1.2.4.2 rmind wakeup((caddr_t)rp);
785 1.2.4.2 rmind }
786 1.2.4.2 rmind }
787 1.2.4.2 rmind
788 1.2.4.2 rmind /*
789 1.2.4.2 rmind * Free up the entry.
790 1.2.4.2 rmind * Must not sleep.
791 1.2.4.2 rmind */
792 1.2.4.2 rmind static void
793 1.2.4.2 rmind nfsrc_freecache(struct nfsrvcache *rp)
794 1.2.4.2 rmind {
795 1.2.4.2 rmind
796 1.2.4.2 rmind LIST_REMOVE(rp, rc_hash);
797 1.2.4.2 rmind if (rp->rc_flag & RC_UDP) {
798 1.2.4.2 rmind TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
799 1.2.4.2 rmind nfsrc_udpcachesize--;
800 1.2.4.2 rmind }
801 1.2.4.2 rmind nfsrc_wanted(rp);
802 1.2.4.2 rmind if (rp->rc_flag & RC_REPMBUF) {
803 1.2.4.2 rmind mbuf_freem(rp->rc_reply);
804 1.2.4.2 rmind if (!(rp->rc_flag & RC_UDP))
805 1.2.4.2 rmind atomic_add_int(&nfsrc_tcpsavedreplies, -1);
806 1.2.4.2 rmind }
807 1.2.4.2 rmind FREE((caddr_t)rp, M_NFSRVCACHE);
808 1.2.4.2 rmind atomic_add_int(&newnfsstats.srvcache_size, -1);
809 1.2.4.2 rmind }
810 1.2.4.2 rmind
811 1.2.4.2 rmind /*
812 1.2.4.2 rmind * Clean out the cache. Called when nfsserver module is unloaded.
813 1.2.4.2 rmind */
814 1.2.4.2 rmind APPLESTATIC void
815 1.2.4.2 rmind nfsrvd_cleancache(void)
816 1.2.4.2 rmind {
817 1.2.4.2 rmind struct nfsrvcache *rp, *nextrp;
818 1.2.4.2 rmind int i;
819 1.2.4.2 rmind
820 1.2.4.2 rmind for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
821 1.2.4.2 rmind mtx_lock(&nfsrchash_table[i].mtx);
822 1.2.4.2 rmind LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
823 1.2.4.2 rmind nfsrc_freecache(rp);
824 1.2.4.2 rmind mtx_unlock(&nfsrchash_table[i].mtx);
825 1.2.4.2 rmind }
826 1.2.4.2 rmind mtx_lock(&nfsrc_udpmtx);
827 1.2.4.2 rmind for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
828 1.2.4.2 rmind LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
829 1.2.4.2 rmind nfsrc_freecache(rp);
830 1.2.4.2 rmind }
831 1.2.4.2 rmind }
832 1.2.4.2 rmind newnfsstats.srvcache_size = 0;
833 1.2.4.2 rmind mtx_unlock(&nfsrc_udpmtx);
834 1.2.4.2 rmind nfsrc_tcpsavedreplies = 0;
835 1.2.4.2 rmind }
836 1.2.4.2 rmind
837 1.2.4.2 rmind /*
838 1.2.4.2 rmind * The basic rule is to get rid of entries that are expired.
839 1.2.4.2 rmind */
840 1.2.4.2 rmind static void
841 1.2.4.2 rmind nfsrc_trimcache(u_int64_t sockref, struct socket *so)
842 1.2.4.2 rmind {
843 1.2.4.2 rmind struct nfsrvcache *rp, *nextrp;
844 1.2.4.2 rmind int i, j, k, time_histo[10];
845 1.2.4.2 rmind time_t thisstamp;
846 1.2.4.2 rmind static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
847 1.2.4.2 rmind static int onethread = 0;
848 1.2.4.2 rmind
849 1.2.4.2 rmind if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
850 1.2.4.2 rmind return;
851 1.2.4.2 rmind if (NFSD_MONOSEC != udp_lasttrim ||
852 1.2.4.2 rmind nfsrc_udpcachesize >= (nfsrc_udphighwater +
853 1.2.4.2 rmind nfsrc_udphighwater / 2)) {
854 1.2.4.2 rmind mtx_lock(&nfsrc_udpmtx);
855 1.2.4.2 rmind udp_lasttrim = NFSD_MONOSEC;
856 1.2.4.2 rmind TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
857 1.2.4.2 rmind if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
858 1.2.4.2 rmind && rp->rc_refcnt == 0
859 1.2.4.2 rmind && ((rp->rc_flag & RC_REFCNT) ||
860 1.2.4.2 rmind udp_lasttrim > rp->rc_timestamp ||
861 1.2.4.2 rmind nfsrc_udpcachesize > nfsrc_udphighwater))
862 1.2.4.2 rmind nfsrc_freecache(rp);
863 1.2.4.2 rmind }
864 1.2.4.2 rmind mtx_unlock(&nfsrc_udpmtx);
865 1.2.4.2 rmind }
866 1.2.4.2 rmind if (NFSD_MONOSEC != tcp_lasttrim ||
867 1.2.4.2 rmind nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
868 1.2.4.2 rmind for (i = 0; i < 10; i++)
869 1.2.4.2 rmind time_histo[i] = 0;
870 1.2.4.2 rmind for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
871 1.2.4.2 rmind mtx_lock(&nfsrchash_table[i].mtx);
872 1.2.4.2 rmind if (i == 0)
873 1.2.4.2 rmind tcp_lasttrim = NFSD_MONOSEC;
874 1.2.4.2 rmind LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
875 1.2.4.2 rmind nextrp) {
876 1.2.4.2 rmind if (!(rp->rc_flag &
877 1.2.4.2 rmind (RC_INPROG|RC_LOCKED|RC_WANTED))
878 1.2.4.2 rmind && rp->rc_refcnt == 0) {
879 1.2.4.2 rmind /*
880 1.2.4.2 rmind * The timestamps range from roughly the
881 1.2.4.2 rmind * present (tcp_lasttrim) to the present
882 1.2.4.2 rmind * + nfsrc_tcptimeout. Generate a simple
883 1.2.4.2 rmind * histogram of where the timeouts fall.
884 1.2.4.2 rmind */
885 1.2.4.2 rmind j = rp->rc_timestamp - tcp_lasttrim;
886 1.2.4.2 rmind if (j >= nfsrc_tcptimeout)
887 1.2.4.2 rmind j = nfsrc_tcptimeout - 1;
888 1.2.4.2 rmind if (j < 0)
889 1.2.4.2 rmind j = 0;
890 1.2.4.2 rmind j = (j * 10 / nfsrc_tcptimeout) % 10;
891 1.2.4.2 rmind time_histo[j]++;
892 1.2.4.2 rmind if ((rp->rc_flag & RC_REFCNT) ||
893 1.2.4.2 rmind tcp_lasttrim > rp->rc_timestamp ||
894 1.2.4.2 rmind nfsrc_activesocket(rp, sockref, so))
895 1.2.4.2 rmind nfsrc_freecache(rp);
896 1.2.4.2 rmind }
897 1.2.4.2 rmind }
898 1.2.4.2 rmind mtx_unlock(&nfsrchash_table[i].mtx);
899 1.2.4.2 rmind }
900 1.2.4.2 rmind j = nfsrc_tcphighwater / 5; /* 20% of it */
901 1.2.4.2 rmind if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) {
902 1.2.4.2 rmind /*
903 1.2.4.2 rmind * Trim some more with a smaller timeout of as little
904 1.2.4.2 rmind * as 20% of nfsrc_tcptimeout to try and get below
905 1.2.4.2 rmind * 80% of the nfsrc_tcphighwater.
906 1.2.4.2 rmind */
907 1.2.4.2 rmind k = 0;
908 1.2.4.2 rmind for (i = 0; i < 8; i++) {
909 1.2.4.2 rmind k += time_histo[i];
910 1.2.4.2 rmind if (k > j)
911 1.2.4.2 rmind break;
912 1.2.4.2 rmind }
913 1.2.4.2 rmind k = nfsrc_tcptimeout * (i + 1) / 10;
914 1.2.4.2 rmind if (k < 1)
915 1.2.4.2 rmind k = 1;
916 1.2.4.2 rmind thisstamp = tcp_lasttrim + k;
917 1.2.4.2 rmind for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
918 1.2.4.2 rmind mtx_lock(&nfsrchash_table[i].mtx);
919 1.2.4.2 rmind LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
920 1.2.4.2 rmind rc_hash, nextrp) {
921 1.2.4.2 rmind if (!(rp->rc_flag &
922 1.2.4.2 rmind (RC_INPROG|RC_LOCKED|RC_WANTED))
923 1.2.4.2 rmind && rp->rc_refcnt == 0
924 1.2.4.2 rmind && ((rp->rc_flag & RC_REFCNT) ||
925 1.2.4.2 rmind thisstamp > rp->rc_timestamp ||
926 1.2.4.2 rmind nfsrc_activesocket(rp, sockref,
927 1.2.4.2 rmind so)))
928 1.2.4.2 rmind nfsrc_freecache(rp);
929 1.2.4.2 rmind }
930 1.2.4.2 rmind mtx_unlock(&nfsrchash_table[i].mtx);
931 1.2.4.2 rmind }
932 1.2.4.2 rmind }
933 1.2.4.2 rmind }
934 1.2.4.2 rmind atomic_store_rel_int(&onethread, 0);
935 1.2.4.2 rmind }
936 1.2.4.2 rmind
937 1.2.4.2 rmind /*
938 1.2.4.2 rmind * Add a seqid# reference to the cache entry.
939 1.2.4.2 rmind */
940 1.2.4.2 rmind APPLESTATIC void
941 1.2.4.2 rmind nfsrvd_refcache(struct nfsrvcache *rp)
942 1.2.4.2 rmind {
943 1.2.4.2 rmind struct mtx *mutex;
944 1.2.4.2 rmind
945 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
946 1.2.4.2 rmind mtx_lock(mutex);
947 1.2.4.2 rmind if (rp->rc_refcnt < 0)
948 1.2.4.2 rmind panic("nfs cache refcnt");
949 1.2.4.2 rmind rp->rc_refcnt++;
950 1.2.4.2 rmind mtx_unlock(mutex);
951 1.2.4.2 rmind }
952 1.2.4.2 rmind
953 1.2.4.2 rmind /*
954 1.2.4.2 rmind * Dereference a seqid# cache entry.
955 1.2.4.2 rmind */
956 1.2.4.2 rmind APPLESTATIC void
957 1.2.4.2 rmind nfsrvd_derefcache(struct nfsrvcache *rp)
958 1.2.4.2 rmind {
959 1.2.4.2 rmind struct mtx *mutex;
960 1.2.4.2 rmind
961 1.2.4.2 rmind mutex = nfsrc_cachemutex(rp);
962 1.2.4.2 rmind mtx_lock(mutex);
963 1.2.4.2 rmind if (rp->rc_refcnt <= 0)
964 1.2.4.2 rmind panic("nfs cache derefcnt");
965 1.2.4.2 rmind rp->rc_refcnt--;
966 1.2.4.2 rmind if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
967 1.2.4.2 rmind nfsrc_freecache(rp);
968 1.2.4.2 rmind mtx_unlock(mutex);
969 1.2.4.2 rmind }
970 1.2.4.2 rmind
971 1.2.4.2 rmind /*
972 1.2.4.2 rmind * Check to see if the socket is active.
973 1.2.4.2 rmind * Return 1 if the reply has been received/acknowledged by the client,
974 1.2.4.2 rmind * 0 otherwise.
975 1.2.4.2 rmind * XXX - Uses tcp internals.
976 1.2.4.2 rmind */
977 1.2.4.2 rmind static int
978 1.2.4.2 rmind nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
979 1.2.4.2 rmind struct socket *cur_so)
980 1.2.4.2 rmind {
981 1.2.4.2 rmind int ret = 0;
982 1.2.4.2 rmind
983 1.2.4.2 rmind if (!(rp->rc_flag & RC_TCPSEQ))
984 1.2.4.2 rmind return (ret);
985 1.2.4.2 rmind /*
986 1.2.4.2 rmind * If the sockref is the same, it is the same TCP connection.
987 1.2.4.2 rmind */
988 1.2.4.2 rmind if (cur_sockref == rp->rc_sockref)
989 1.2.4.2 rmind ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
990 1.2.4.2 rmind return (ret);
991 1.2.4.2 rmind }
992 1.2.4.2 rmind
993 1.2.4.2 rmind /*
994 1.2.4.2 rmind * Calculate the length of the mbuf list and a checksum on the first up to
995 1.2.4.2 rmind * NFSRVCACHE_CHECKLEN bytes.
996 1.2.4.2 rmind */
997 1.2.4.2 rmind static int
998 1.2.4.2 rmind nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
999 1.2.4.2 rmind {
1000 1.2.4.2 rmind int len = 0, cklen;
1001 1.2.4.2 rmind mbuf_t m;
1002 1.2.4.2 rmind
1003 1.2.4.2 rmind m = m1;
1004 1.2.4.2 rmind while (m) {
1005 1.2.4.2 rmind len += mbuf_len(m);
1006 1.2.4.2 rmind m = mbuf_next(m);
1007 1.2.4.2 rmind }
1008 1.2.4.2 rmind cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
1009 1.2.4.2 rmind *cksum = in_cksum(m1, cklen);
1010 1.2.4.2 rmind return (len);
1011 1.2.4.2 rmind }
1012 1.2.4.2 rmind
1013 1.2.4.2 rmind /*
1014 1.2.4.2 rmind * Mark a TCP connection that is seeing retries. Should never happen for
1015 1.2.4.2 rmind * NFSv4.
1016 1.2.4.2 rmind */
1017 1.2.4.2 rmind static void
1018 1.2.4.2 rmind nfsrc_marksametcpconn(u_int64_t sockref)
1019 1.2.4.2 rmind {
1020 1.2.4.2 rmind }
1021 1.2.4.2 rmind
1022