nfs_nfsdcache.c revision 1.2.6.2 1 1.2.6.2 yamt /* $NetBSD: nfs_nfsdcache.c,v 1.2.6.2 2014/05/22 11:41:01 yamt Exp $ */
2 1.2.6.2 yamt /*-
3 1.2.6.2 yamt * Copyright (c) 1989, 1993
4 1.2.6.2 yamt * The Regents of the University of California. All rights reserved.
5 1.2.6.2 yamt *
6 1.2.6.2 yamt * This code is derived from software contributed to Berkeley by
7 1.2.6.2 yamt * Rick Macklem at The University of Guelph.
8 1.2.6.2 yamt *
9 1.2.6.2 yamt * Redistribution and use in source and binary forms, with or without
10 1.2.6.2 yamt * modification, are permitted provided that the following conditions
11 1.2.6.2 yamt * are met:
12 1.2.6.2 yamt * 1. Redistributions of source code must retain the above copyright
13 1.2.6.2 yamt * notice, this list of conditions and the following disclaimer.
14 1.2.6.2 yamt * 2. Redistributions in binary form must reproduce the above copyright
15 1.2.6.2 yamt * notice, this list of conditions and the following disclaimer in the
16 1.2.6.2 yamt * documentation and/or other materials provided with the distribution.
17 1.2.6.2 yamt * 4. Neither the name of the University nor the names of its contributors
18 1.2.6.2 yamt * may be used to endorse or promote products derived from this software
19 1.2.6.2 yamt * without specific prior written permission.
20 1.2.6.2 yamt *
21 1.2.6.2 yamt * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 1.2.6.2 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 1.2.6.2 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 1.2.6.2 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 1.2.6.2 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 1.2.6.2 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 1.2.6.2 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 1.2.6.2 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 1.2.6.2 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 1.2.6.2 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 1.2.6.2 yamt * SUCH DAMAGE.
32 1.2.6.2 yamt *
33 1.2.6.2 yamt */
34 1.2.6.2 yamt
35 1.2.6.2 yamt #include <sys/cdefs.h>
36 1.2.6.2 yamt /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 254337 2013-08-14 21:11:26Z rmacklem "); */
37 1.2.6.2 yamt __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.2.6.2 2014/05/22 11:41:01 yamt Exp $");
38 1.2.6.2 yamt
39 1.2.6.2 yamt /*
40 1.2.6.2 yamt * Here is the basic algorithm:
41 1.2.6.2 yamt * First, some design criteria I used:
42 1.2.6.2 yamt * - I think a false hit is more serious than a false miss
43 1.2.6.2 yamt * - A false hit for an RPC that has Op(s) that order via seqid# must be
44 1.2.6.2 yamt * avoided at all cost
45 1.2.6.2 yamt * - A valid hit will probably happen a long time after the original reply
46 1.2.6.2 yamt * and the TCP socket that the original request was received on will no
47 1.2.6.2 yamt * longer be active
48 1.2.6.2 yamt * (The long time delay implies to me that LRU is not appropriate.)
49 1.2.6.2 yamt * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
50 1.2.6.2 yamt * in them as well as minimizing the risk of redoing retried non-idempotent
51 1.2.6.2 yamt * Ops.
52 1.2.6.2 yamt * Because it is biased towards avoiding false hits, multiple entries with
53 1.2.6.2 yamt * the same xid are to be expected, especially for the case of the entry
54 1.2.6.2 yamt * in the cache being related to a seqid# sequenced Op.
55 1.2.6.2 yamt *
56 1.2.6.2 yamt * The basic algorithm I'm about to code up:
57 1.2.6.2 yamt * - Null RPCs bypass the cache and are just done
58 1.2.6.2 yamt * For TCP
59 1.2.6.2 yamt * - key on <xid, NFS version> (as noted above, there can be several
60 1.2.6.2 yamt * entries with the same key)
61 1.2.6.2 yamt * When a request arrives:
62 1.2.6.2 yamt * For all that match key
63 1.2.6.2 yamt * - if RPC# != OR request_size !=
64 1.2.6.2 yamt * - not a match with this one
65 1.2.6.2 yamt * - if NFSv4 and received on same TCP socket OR
66 1.2.6.2 yamt * received on a TCP connection created before the
67 1.2.6.2 yamt * entry was cached
68 1.2.6.2 yamt * - not a match with this one
69 1.2.6.2 yamt * (V2,3 clients might retry on same TCP socket)
70 1.2.6.2 yamt * - calculate checksum on first N bytes of NFS XDR
71 1.2.6.2 yamt * - if checksum !=
72 1.2.6.2 yamt * - not a match for this one
73 1.2.6.2 yamt * If any of the remaining ones that match has a
74 1.2.6.2 yamt * seqid_refcnt > 0
75 1.2.6.2 yamt * - not a match (go do RPC, using new cache entry)
76 1.2.6.2 yamt * If one match left
77 1.2.6.2 yamt * - a hit (reply from cache)
78 1.2.6.2 yamt * else
79 1.2.6.2 yamt * - miss (go do RPC, using new cache entry)
80 1.2.6.2 yamt *
81 1.2.6.2 yamt * During processing of NFSv4 request:
82 1.2.6.2 yamt * - set a flag when a non-idempotent Op is processed
83 1.2.6.2 yamt * - when an Op that uses a seqid# (Open,...) is processed
84 1.2.6.2 yamt * - if same seqid# as referenced entry in cache
85 1.2.6.2 yamt * - free new cache entry
86 1.2.6.2 yamt * - reply from referenced cache entry
87 1.2.6.2 yamt * else if next seqid# in order
88 1.2.6.2 yamt * - free referenced cache entry
89 1.2.6.2 yamt * - increment seqid_refcnt on new cache entry
90 1.2.6.2 yamt * - set pointer from Openowner/Lockowner to
91 1.2.6.2 yamt * new cache entry (aka reference it)
92 1.2.6.2 yamt * else if first seqid# in sequence
93 1.2.6.2 yamt * - increment seqid_refcnt on new cache entry
94 1.2.6.2 yamt * - set pointer from Openowner/Lockowner to
95 1.2.6.2 yamt * new cache entry (aka reference it)
96 1.2.6.2 yamt *
97 1.2.6.2 yamt * At end of RPC processing:
98 1.2.6.2 yamt * - if seqid_refcnt > 0 OR flagged non-idempotent on new
99 1.2.6.2 yamt * cache entry
100 1.2.6.2 yamt * - save reply in cache entry
101 1.2.6.2 yamt * - calculate checksum on first N bytes of NFS XDR
102 1.2.6.2 yamt * request
103 1.2.6.2 yamt * - note op and length of XDR request (in bytes)
104 1.2.6.2 yamt * - timestamp it
105 1.2.6.2 yamt * else
106 1.2.6.2 yamt * - free new cache entry
107 1.2.6.2 yamt * - Send reply (noting info for socket activity check, below)
108 1.2.6.2 yamt *
109 1.2.6.2 yamt * For cache entries saved above:
110 1.2.6.2 yamt * - if saved since seqid_refcnt was > 0
111 1.2.6.2 yamt * - free when seqid_refcnt decrements to 0
112 1.2.6.2 yamt * (when next one in sequence is processed above, or
113 1.2.6.2 yamt * when Openowner/Lockowner is discarded)
114 1.2.6.2 yamt * else { non-idempotent Op(s) }
115 1.2.6.2 yamt * - free when
116 1.2.6.2 yamt * - some further activity observed on same
117 1.2.6.2 yamt * socket
118 1.2.6.2 yamt * (I'm not yet sure how I'm going to do
119 1.2.6.2 yamt * this. Maybe look at the TCP connection
120 1.2.6.2 yamt * to see if the send_tcp_sequence# is well
121 1.2.6.2 yamt * past sent reply OR K additional RPCs
122 1.2.6.2 yamt * replied on same socket OR?)
123 1.2.6.2 yamt * OR
124 1.2.6.2 yamt * - when very old (hours, days, weeks?)
125 1.2.6.2 yamt *
126 1.2.6.2 yamt * For UDP (v2, 3 only), pretty much the old way:
127 1.2.6.2 yamt * - key on <xid, NFS version, RPC#, Client host ip#>
128 1.2.6.2 yamt * (at most one entry for each key)
129 1.2.6.2 yamt *
130 1.2.6.2 yamt * When a Request arrives:
131 1.2.6.2 yamt * - if a match with entry via key
132 1.2.6.2 yamt * - if RPC marked In_progress
133 1.2.6.2 yamt * - discard request (don't send reply)
134 1.2.6.2 yamt * else
135 1.2.6.2 yamt * - reply from cache
136 1.2.6.2 yamt * - timestamp cache entry
137 1.2.6.2 yamt * else
138 1.2.6.2 yamt * - add entry to cache, marked In_progress
139 1.2.6.2 yamt * - do RPC
140 1.2.6.2 yamt * - when RPC done
141 1.2.6.2 yamt * - if RPC# non-idempotent
142 1.2.6.2 yamt * - mark entry Done (not In_progress)
143 1.2.6.2 yamt * - save reply
144 1.2.6.2 yamt * - timestamp cache entry
145 1.2.6.2 yamt * else
146 1.2.6.2 yamt * - free cache entry
147 1.2.6.2 yamt * - send reply
148 1.2.6.2 yamt *
149 1.2.6.2 yamt * Later, entries with saved replies are free'd a short time (few minutes)
150 1.2.6.2 yamt * after reply sent (timestamp).
151 1.2.6.2 yamt * Reference: Chet Juszczak, "Improving the Performance and Correctness
152 1.2.6.2 yamt * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
153 1.2.6.2 yamt * pages 53-63. San Diego, February 1989.
154 1.2.6.2 yamt * for the UDP case.
155 1.2.6.2 yamt * nfsrc_floodlevel is set to the allowable upper limit for saved replies
156 1.2.6.2 yamt * for TCP. For V3, a reply won't be saved when the flood level is
157 1.2.6.2 yamt * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
158 1.2.6.2 yamt * that case. This level should be set high enough that this almost
159 1.2.6.2 yamt * never happens.
160 1.2.6.2 yamt */
161 1.2.6.2 yamt #ifndef APPLEKEXT
162 1.2.6.2 yamt #include <fs/nfs/nfsport.h>
163 1.2.6.2 yamt
164 1.2.6.2 yamt extern struct nfsstats newnfsstats;
165 1.2.6.2 yamt extern struct mtx nfsrc_udpmtx;
166 1.2.6.2 yamt extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
167 1.2.6.2 yamt int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
168 1.2.6.2 yamt #endif /* !APPLEKEXT */
169 1.2.6.2 yamt
170 1.2.6.2 yamt SYSCTL_DECL(_vfs_nfsd);
171 1.2.6.2 yamt
172 1.2.6.2 yamt static u_int nfsrc_tcphighwater = 0;
173 1.2.6.2 yamt static int
174 1.2.6.2 yamt sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
175 1.2.6.2 yamt {
176 1.2.6.2 yamt int error, newhighwater;
177 1.2.6.2 yamt
178 1.2.6.2 yamt newhighwater = nfsrc_tcphighwater;
179 1.2.6.2 yamt error = sysctl_handle_int(oidp, &newhighwater, 0, req);
180 1.2.6.2 yamt if (error != 0 || req->newptr == NULL)
181 1.2.6.2 yamt return (error);
182 1.2.6.2 yamt if (newhighwater < 0)
183 1.2.6.2 yamt return (EINVAL);
184 1.2.6.2 yamt if (newhighwater >= nfsrc_floodlevel)
185 1.2.6.2 yamt nfsrc_floodlevel = newhighwater + newhighwater / 5;
186 1.2.6.2 yamt nfsrc_tcphighwater = newhighwater;
187 1.2.6.2 yamt return (0);
188 1.2.6.2 yamt }
189 1.2.6.2 yamt SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
190 1.2.6.2 yamt sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
191 1.2.6.2 yamt "High water mark for TCP cache entries");
192 1.2.6.2 yamt
193 1.2.6.2 yamt static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
194 1.2.6.2 yamt SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
195 1.2.6.2 yamt &nfsrc_udphighwater, 0,
196 1.2.6.2 yamt "High water mark for UDP cache entries");
197 1.2.6.2 yamt static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
198 1.2.6.2 yamt SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
199 1.2.6.2 yamt &nfsrc_tcptimeout, 0,
200 1.2.6.2 yamt "Timeout for TCP entries in the DRC");
201 1.2.6.2 yamt static u_int nfsrc_tcpnonidempotent = 1;
202 1.2.6.2 yamt SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
203 1.2.6.2 yamt &nfsrc_tcpnonidempotent, 0,
204 1.2.6.2 yamt "Enable the DRC for NFS over TCP");
205 1.2.6.2 yamt
206 1.2.6.2 yamt static int nfsrc_udpcachesize = 0;
207 1.2.6.2 yamt static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
208 1.2.6.2 yamt static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
209 1.2.6.2 yamt
210 1.2.6.2 yamt /*
211 1.2.6.2 yamt * and the reverse mapping from generic to Version 2 procedure numbers
212 1.2.6.2 yamt */
213 1.2.6.2 yamt static int newnfsv2_procid[NFS_V3NPROCS] = {
214 1.2.6.2 yamt NFSV2PROC_NULL,
215 1.2.6.2 yamt NFSV2PROC_GETATTR,
216 1.2.6.2 yamt NFSV2PROC_SETATTR,
217 1.2.6.2 yamt NFSV2PROC_LOOKUP,
218 1.2.6.2 yamt NFSV2PROC_NOOP,
219 1.2.6.2 yamt NFSV2PROC_READLINK,
220 1.2.6.2 yamt NFSV2PROC_READ,
221 1.2.6.2 yamt NFSV2PROC_WRITE,
222 1.2.6.2 yamt NFSV2PROC_CREATE,
223 1.2.6.2 yamt NFSV2PROC_MKDIR,
224 1.2.6.2 yamt NFSV2PROC_SYMLINK,
225 1.2.6.2 yamt NFSV2PROC_CREATE,
226 1.2.6.2 yamt NFSV2PROC_REMOVE,
227 1.2.6.2 yamt NFSV2PROC_RMDIR,
228 1.2.6.2 yamt NFSV2PROC_RENAME,
229 1.2.6.2 yamt NFSV2PROC_LINK,
230 1.2.6.2 yamt NFSV2PROC_READDIR,
231 1.2.6.2 yamt NFSV2PROC_NOOP,
232 1.2.6.2 yamt NFSV2PROC_STATFS,
233 1.2.6.2 yamt NFSV2PROC_NOOP,
234 1.2.6.2 yamt NFSV2PROC_NOOP,
235 1.2.6.2 yamt NFSV2PROC_NOOP,
236 1.2.6.2 yamt };
237 1.2.6.2 yamt
238 1.2.6.2 yamt #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
239 1.2.6.2 yamt #define NFSRCUDPHASH(xid) \
240 1.2.6.2 yamt (&nfsrvudphashtbl[nfsrc_hash(xid)])
241 1.2.6.2 yamt #define NFSRCHASH(xid) \
242 1.2.6.2 yamt (&nfsrchash_table[nfsrc_hash(xid)].tbl)
243 1.2.6.2 yamt #define TRUE 1
244 1.2.6.2 yamt #define FALSE 0
245 1.2.6.2 yamt #define NFSRVCACHE_CHECKLEN 100
246 1.2.6.2 yamt
247 1.2.6.2 yamt /* True iff the rpc reply is an nfs status ONLY! */
248 1.2.6.2 yamt static int nfsv2_repstat[NFS_V3NPROCS] = {
249 1.2.6.2 yamt FALSE,
250 1.2.6.2 yamt FALSE,
251 1.2.6.2 yamt FALSE,
252 1.2.6.2 yamt FALSE,
253 1.2.6.2 yamt FALSE,
254 1.2.6.2 yamt FALSE,
255 1.2.6.2 yamt FALSE,
256 1.2.6.2 yamt FALSE,
257 1.2.6.2 yamt FALSE,
258 1.2.6.2 yamt FALSE,
259 1.2.6.2 yamt TRUE,
260 1.2.6.2 yamt TRUE,
261 1.2.6.2 yamt TRUE,
262 1.2.6.2 yamt TRUE,
263 1.2.6.2 yamt FALSE,
264 1.2.6.2 yamt TRUE,
265 1.2.6.2 yamt FALSE,
266 1.2.6.2 yamt FALSE,
267 1.2.6.2 yamt FALSE,
268 1.2.6.2 yamt FALSE,
269 1.2.6.2 yamt FALSE,
270 1.2.6.2 yamt FALSE,
271 1.2.6.2 yamt };
272 1.2.6.2 yamt
273 1.2.6.2 yamt /*
274 1.2.6.2 yamt * Will NFS want to work over IPv6 someday?
275 1.2.6.2 yamt */
276 1.2.6.2 yamt #define NETFAMILY(rp) \
277 1.2.6.2 yamt (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
278 1.2.6.2 yamt
279 1.2.6.2 yamt /* local functions */
280 1.2.6.2 yamt static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
281 1.2.6.2 yamt static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
282 1.2.6.2 yamt static void nfsrc_lock(struct nfsrvcache *rp);
283 1.2.6.2 yamt static void nfsrc_unlock(struct nfsrvcache *rp);
284 1.2.6.2 yamt static void nfsrc_wanted(struct nfsrvcache *rp);
285 1.2.6.2 yamt static void nfsrc_freecache(struct nfsrvcache *rp);
286 1.2.6.2 yamt static void nfsrc_trimcache(u_int64_t, struct socket *);
287 1.2.6.2 yamt static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
288 1.2.6.2 yamt struct socket *);
289 1.2.6.2 yamt static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
290 1.2.6.2 yamt static void nfsrc_marksametcpconn(u_int64_t);
291 1.2.6.2 yamt
292 1.2.6.2 yamt /*
293 1.2.6.2 yamt * Return the correct mutex for this cache entry.
294 1.2.6.2 yamt */
295 1.2.6.2 yamt static __inline struct mtx *
296 1.2.6.2 yamt nfsrc_cachemutex(struct nfsrvcache *rp)
297 1.2.6.2 yamt {
298 1.2.6.2 yamt
299 1.2.6.2 yamt if ((rp->rc_flag & RC_UDP) != 0)
300 1.2.6.2 yamt return (&nfsrc_udpmtx);
301 1.2.6.2 yamt return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
302 1.2.6.2 yamt }
303 1.2.6.2 yamt
304 1.2.6.2 yamt /*
305 1.2.6.2 yamt * Initialize the server request cache list
306 1.2.6.2 yamt */
307 1.2.6.2 yamt APPLESTATIC void
308 1.2.6.2 yamt nfsrvd_initcache(void)
309 1.2.6.2 yamt {
310 1.2.6.2 yamt int i;
311 1.2.6.2 yamt static int inited = 0;
312 1.2.6.2 yamt
313 1.2.6.2 yamt if (inited)
314 1.2.6.2 yamt return;
315 1.2.6.2 yamt inited = 1;
316 1.2.6.2 yamt for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
317 1.2.6.2 yamt LIST_INIT(&nfsrvudphashtbl[i]);
318 1.2.6.2 yamt LIST_INIT(&nfsrchash_table[i].tbl);
319 1.2.6.2 yamt }
320 1.2.6.2 yamt TAILQ_INIT(&nfsrvudplru);
321 1.2.6.2 yamt nfsrc_tcpsavedreplies = 0;
322 1.2.6.2 yamt nfsrc_udpcachesize = 0;
323 1.2.6.2 yamt newnfsstats.srvcache_tcppeak = 0;
324 1.2.6.2 yamt newnfsstats.srvcache_size = 0;
325 1.2.6.2 yamt }
326 1.2.6.2 yamt
327 1.2.6.2 yamt /*
328 1.2.6.2 yamt * Get a cache entry for this request. Basically just malloc a new one
329 1.2.6.2 yamt * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
330 1.2.6.2 yamt * Call nfsrc_trimcache() to clean up the cache before returning.
331 1.2.6.2 yamt */
332 1.2.6.2 yamt APPLESTATIC int
333 1.2.6.2 yamt nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
334 1.2.6.2 yamt {
335 1.2.6.2 yamt struct nfsrvcache *newrp;
336 1.2.6.2 yamt int ret;
337 1.2.6.2 yamt
338 1.2.6.2 yamt if (nd->nd_procnum == NFSPROC_NULL)
339 1.2.6.2 yamt panic("nfsd cache null");
340 1.2.6.2 yamt MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
341 1.2.6.2 yamt M_NFSRVCACHE, M_WAITOK);
342 1.2.6.2 yamt NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
343 1.2.6.2 yamt if (nd->nd_flag & ND_NFSV4)
344 1.2.6.2 yamt newrp->rc_flag = RC_NFSV4;
345 1.2.6.2 yamt else if (nd->nd_flag & ND_NFSV3)
346 1.2.6.2 yamt newrp->rc_flag = RC_NFSV3;
347 1.2.6.2 yamt else
348 1.2.6.2 yamt newrp->rc_flag = RC_NFSV2;
349 1.2.6.2 yamt newrp->rc_xid = nd->nd_retxid;
350 1.2.6.2 yamt newrp->rc_proc = nd->nd_procnum;
351 1.2.6.2 yamt newrp->rc_sockref = nd->nd_sockref;
352 1.2.6.2 yamt newrp->rc_cachetime = nd->nd_tcpconntime;
353 1.2.6.2 yamt if (nd->nd_flag & ND_SAMETCPCONN)
354 1.2.6.2 yamt newrp->rc_flag |= RC_SAMETCPCONN;
355 1.2.6.2 yamt if (nd->nd_nam2 != NULL) {
356 1.2.6.2 yamt newrp->rc_flag |= RC_UDP;
357 1.2.6.2 yamt ret = nfsrc_getudp(nd, newrp);
358 1.2.6.2 yamt } else {
359 1.2.6.2 yamt ret = nfsrc_gettcp(nd, newrp);
360 1.2.6.2 yamt }
361 1.2.6.2 yamt nfsrc_trimcache(nd->nd_sockref, so);
362 1.2.6.2 yamt NFSEXITCODE2(0, nd);
363 1.2.6.2 yamt return (ret);
364 1.2.6.2 yamt }
365 1.2.6.2 yamt
366 1.2.6.2 yamt /*
367 1.2.6.2 yamt * For UDP (v2, v3):
368 1.2.6.2 yamt * - key on <xid, NFS version, RPC#, Client host ip#>
369 1.2.6.2 yamt * (at most one entry for each key)
370 1.2.6.2 yamt */
371 1.2.6.2 yamt static int
372 1.2.6.2 yamt nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
373 1.2.6.2 yamt {
374 1.2.6.2 yamt struct nfsrvcache *rp;
375 1.2.6.2 yamt struct sockaddr_in *saddr;
376 1.2.6.2 yamt struct sockaddr_in6 *saddr6;
377 1.2.6.2 yamt struct nfsrvhashhead *hp;
378 1.2.6.2 yamt int ret = 0;
379 1.2.6.2 yamt struct mtx *mutex;
380 1.2.6.2 yamt
381 1.2.6.2 yamt mutex = nfsrc_cachemutex(newrp);
382 1.2.6.2 yamt hp = NFSRCUDPHASH(newrp->rc_xid);
383 1.2.6.2 yamt loop:
384 1.2.6.2 yamt mtx_lock(mutex);
385 1.2.6.2 yamt LIST_FOREACH(rp, hp, rc_hash) {
386 1.2.6.2 yamt if (newrp->rc_xid == rp->rc_xid &&
387 1.2.6.2 yamt newrp->rc_proc == rp->rc_proc &&
388 1.2.6.2 yamt (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
389 1.2.6.2 yamt nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
390 1.2.6.2 yamt if ((rp->rc_flag & RC_LOCKED) != 0) {
391 1.2.6.2 yamt rp->rc_flag |= RC_WANTED;
392 1.2.6.2 yamt (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
393 1.2.6.2 yamt "nfsrc", 10 * hz);
394 1.2.6.2 yamt goto loop;
395 1.2.6.2 yamt }
396 1.2.6.2 yamt if (rp->rc_flag == 0)
397 1.2.6.2 yamt panic("nfs udp cache0");
398 1.2.6.2 yamt rp->rc_flag |= RC_LOCKED;
399 1.2.6.2 yamt TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
400 1.2.6.2 yamt TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
401 1.2.6.2 yamt if (rp->rc_flag & RC_INPROG) {
402 1.2.6.2 yamt newnfsstats.srvcache_inproghits++;
403 1.2.6.2 yamt mtx_unlock(mutex);
404 1.2.6.2 yamt ret = RC_DROPIT;
405 1.2.6.2 yamt } else if (rp->rc_flag & RC_REPSTATUS) {
406 1.2.6.2 yamt /*
407 1.2.6.2 yamt * V2 only.
408 1.2.6.2 yamt */
409 1.2.6.2 yamt newnfsstats.srvcache_nonidemdonehits++;
410 1.2.6.2 yamt mtx_unlock(mutex);
411 1.2.6.2 yamt nfsrvd_rephead(nd);
412 1.2.6.2 yamt *(nd->nd_errp) = rp->rc_status;
413 1.2.6.2 yamt ret = RC_REPLY;
414 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC +
415 1.2.6.2 yamt NFSRVCACHE_UDPTIMEOUT;
416 1.2.6.2 yamt } else if (rp->rc_flag & RC_REPMBUF) {
417 1.2.6.2 yamt newnfsstats.srvcache_nonidemdonehits++;
418 1.2.6.2 yamt mtx_unlock(mutex);
419 1.2.6.2 yamt nd->nd_mreq = m_copym(rp->rc_reply, 0,
420 1.2.6.2 yamt M_COPYALL, M_WAITOK);
421 1.2.6.2 yamt ret = RC_REPLY;
422 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC +
423 1.2.6.2 yamt NFSRVCACHE_UDPTIMEOUT;
424 1.2.6.2 yamt } else {
425 1.2.6.2 yamt panic("nfs udp cache1");
426 1.2.6.2 yamt }
427 1.2.6.2 yamt nfsrc_unlock(rp);
428 1.2.6.2 yamt free((caddr_t)newrp, M_NFSRVCACHE);
429 1.2.6.2 yamt goto out;
430 1.2.6.2 yamt }
431 1.2.6.2 yamt }
432 1.2.6.2 yamt newnfsstats.srvcache_misses++;
433 1.2.6.2 yamt atomic_add_int(&newnfsstats.srvcache_size, 1);
434 1.2.6.2 yamt nfsrc_udpcachesize++;
435 1.2.6.2 yamt
436 1.2.6.2 yamt newrp->rc_flag |= RC_INPROG;
437 1.2.6.2 yamt saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
438 1.2.6.2 yamt if (saddr->sin_family == AF_INET)
439 1.2.6.2 yamt newrp->rc_inet = saddr->sin_addr.s_addr;
440 1.2.6.2 yamt else if (saddr->sin_family == AF_INET6) {
441 1.2.6.2 yamt saddr6 = (struct sockaddr_in6 *)saddr;
442 1.2.6.2 yamt NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
443 1.2.6.2 yamt sizeof (struct in6_addr));
444 1.2.6.2 yamt newrp->rc_flag |= RC_INETIPV6;
445 1.2.6.2 yamt }
446 1.2.6.2 yamt LIST_INSERT_HEAD(hp, newrp, rc_hash);
447 1.2.6.2 yamt TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
448 1.2.6.2 yamt mtx_unlock(mutex);
449 1.2.6.2 yamt nd->nd_rp = newrp;
450 1.2.6.2 yamt ret = RC_DOIT;
451 1.2.6.2 yamt
452 1.2.6.2 yamt out:
453 1.2.6.2 yamt NFSEXITCODE2(0, nd);
454 1.2.6.2 yamt return (ret);
455 1.2.6.2 yamt }
456 1.2.6.2 yamt
457 1.2.6.2 yamt /*
458 1.2.6.2 yamt * Update a request cache entry after the rpc has been done
459 1.2.6.2 yamt */
460 1.2.6.2 yamt APPLESTATIC struct nfsrvcache *
461 1.2.6.2 yamt nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
462 1.2.6.2 yamt {
463 1.2.6.2 yamt struct nfsrvcache *rp;
464 1.2.6.2 yamt struct nfsrvcache *retrp = NULL;
465 1.2.6.2 yamt mbuf_t m;
466 1.2.6.2 yamt struct mtx *mutex;
467 1.2.6.2 yamt
468 1.2.6.2 yamt rp = nd->nd_rp;
469 1.2.6.2 yamt if (!rp)
470 1.2.6.2 yamt panic("nfsrvd_updatecache null rp");
471 1.2.6.2 yamt nd->nd_rp = NULL;
472 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
473 1.2.6.2 yamt mtx_lock(mutex);
474 1.2.6.2 yamt nfsrc_lock(rp);
475 1.2.6.2 yamt if (!(rp->rc_flag & RC_INPROG))
476 1.2.6.2 yamt panic("nfsrvd_updatecache not inprog");
477 1.2.6.2 yamt rp->rc_flag &= ~RC_INPROG;
478 1.2.6.2 yamt if (rp->rc_flag & RC_UDP) {
479 1.2.6.2 yamt TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
480 1.2.6.2 yamt TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
481 1.2.6.2 yamt }
482 1.2.6.2 yamt
483 1.2.6.2 yamt /*
484 1.2.6.2 yamt * Reply from cache is a special case returned by nfsrv_checkseqid().
485 1.2.6.2 yamt */
486 1.2.6.2 yamt if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
487 1.2.6.2 yamt newnfsstats.srvcache_nonidemdonehits++;
488 1.2.6.2 yamt mtx_unlock(mutex);
489 1.2.6.2 yamt nd->nd_repstat = 0;
490 1.2.6.2 yamt if (nd->nd_mreq)
491 1.2.6.2 yamt mbuf_freem(nd->nd_mreq);
492 1.2.6.2 yamt if (!(rp->rc_flag & RC_REPMBUF))
493 1.2.6.2 yamt panic("reply from cache");
494 1.2.6.2 yamt nd->nd_mreq = m_copym(rp->rc_reply, 0,
495 1.2.6.2 yamt M_COPYALL, M_WAITOK);
496 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
497 1.2.6.2 yamt nfsrc_unlock(rp);
498 1.2.6.2 yamt goto out;
499 1.2.6.2 yamt }
500 1.2.6.2 yamt
501 1.2.6.2 yamt /*
502 1.2.6.2 yamt * If rc_refcnt > 0, save it
503 1.2.6.2 yamt * For UDP, save it if ND_SAVEREPLY is set
504 1.2.6.2 yamt * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
505 1.2.6.2 yamt */
506 1.2.6.2 yamt if (nd->nd_repstat != NFSERR_DONTREPLY &&
507 1.2.6.2 yamt (rp->rc_refcnt > 0 ||
508 1.2.6.2 yamt ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
509 1.2.6.2 yamt ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
510 1.2.6.2 yamt nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
511 1.2.6.2 yamt nfsrc_tcpnonidempotent))) {
512 1.2.6.2 yamt if (rp->rc_refcnt > 0) {
513 1.2.6.2 yamt if (!(rp->rc_flag & RC_NFSV4))
514 1.2.6.2 yamt panic("update_cache refcnt");
515 1.2.6.2 yamt rp->rc_flag |= RC_REFCNT;
516 1.2.6.2 yamt }
517 1.2.6.2 yamt if ((nd->nd_flag & ND_NFSV2) &&
518 1.2.6.2 yamt nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
519 1.2.6.2 yamt rp->rc_status = nd->nd_repstat;
520 1.2.6.2 yamt rp->rc_flag |= RC_REPSTATUS;
521 1.2.6.2 yamt mtx_unlock(mutex);
522 1.2.6.2 yamt } else {
523 1.2.6.2 yamt if (!(rp->rc_flag & RC_UDP)) {
524 1.2.6.2 yamt atomic_add_int(&nfsrc_tcpsavedreplies, 1);
525 1.2.6.2 yamt if (nfsrc_tcpsavedreplies >
526 1.2.6.2 yamt newnfsstats.srvcache_tcppeak)
527 1.2.6.2 yamt newnfsstats.srvcache_tcppeak =
528 1.2.6.2 yamt nfsrc_tcpsavedreplies;
529 1.2.6.2 yamt }
530 1.2.6.2 yamt mtx_unlock(mutex);
531 1.2.6.2 yamt m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
532 1.2.6.2 yamt mtx_lock(mutex);
533 1.2.6.2 yamt rp->rc_reply = m;
534 1.2.6.2 yamt rp->rc_flag |= RC_REPMBUF;
535 1.2.6.2 yamt mtx_unlock(mutex);
536 1.2.6.2 yamt }
537 1.2.6.2 yamt if (rp->rc_flag & RC_UDP) {
538 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC +
539 1.2.6.2 yamt NFSRVCACHE_UDPTIMEOUT;
540 1.2.6.2 yamt nfsrc_unlock(rp);
541 1.2.6.2 yamt } else {
542 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
543 1.2.6.2 yamt if (rp->rc_refcnt > 0)
544 1.2.6.2 yamt nfsrc_unlock(rp);
545 1.2.6.2 yamt else
546 1.2.6.2 yamt retrp = rp;
547 1.2.6.2 yamt }
548 1.2.6.2 yamt } else {
549 1.2.6.2 yamt nfsrc_freecache(rp);
550 1.2.6.2 yamt mtx_unlock(mutex);
551 1.2.6.2 yamt }
552 1.2.6.2 yamt
553 1.2.6.2 yamt out:
554 1.2.6.2 yamt nfsrc_trimcache(nd->nd_sockref, so);
555 1.2.6.2 yamt NFSEXITCODE2(0, nd);
556 1.2.6.2 yamt return (retrp);
557 1.2.6.2 yamt }
558 1.2.6.2 yamt
559 1.2.6.2 yamt /*
560 1.2.6.2 yamt * Invalidate and, if possible, free an in prog cache entry.
561 1.2.6.2 yamt * Must not sleep.
562 1.2.6.2 yamt */
563 1.2.6.2 yamt APPLESTATIC void
564 1.2.6.2 yamt nfsrvd_delcache(struct nfsrvcache *rp)
565 1.2.6.2 yamt {
566 1.2.6.2 yamt struct mtx *mutex;
567 1.2.6.2 yamt
568 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
569 1.2.6.2 yamt if (!(rp->rc_flag & RC_INPROG))
570 1.2.6.2 yamt panic("nfsrvd_delcache not in prog");
571 1.2.6.2 yamt mtx_lock(mutex);
572 1.2.6.2 yamt rp->rc_flag &= ~RC_INPROG;
573 1.2.6.2 yamt if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
574 1.2.6.2 yamt nfsrc_freecache(rp);
575 1.2.6.2 yamt mtx_unlock(mutex);
576 1.2.6.2 yamt }
577 1.2.6.2 yamt
578 1.2.6.2 yamt /*
579 1.2.6.2 yamt * Called after nfsrvd_updatecache() once the reply is sent, to update
580 1.2.6.2 yamt * the entry for nfsrc_activesocket() and unlock it. The argument is
581 1.2.6.2 yamt * the pointer returned by nfsrvd_updatecache().
582 1.2.6.2 yamt */
583 1.2.6.2 yamt APPLESTATIC void
584 1.2.6.2 yamt nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
585 1.2.6.2 yamt {
586 1.2.6.2 yamt tcp_seq tmp_seq;
587 1.2.6.2 yamt struct mtx *mutex;
588 1.2.6.2 yamt
589 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
590 1.2.6.2 yamt if (!(rp->rc_flag & RC_LOCKED))
591 1.2.6.2 yamt panic("nfsrvd_sentcache not locked");
592 1.2.6.2 yamt if (!err) {
593 1.2.6.2 yamt if ((so->so_proto->pr_domain->dom_family != AF_INET &&
594 1.2.6.2 yamt so->so_proto->pr_domain->dom_family != AF_INET6) ||
595 1.2.6.2 yamt so->so_proto->pr_protocol != IPPROTO_TCP)
596 1.2.6.2 yamt panic("nfs sent cache");
597 1.2.6.2 yamt if (nfsrv_getsockseqnum(so, &tmp_seq)) {
598 1.2.6.2 yamt mtx_lock(mutex);
599 1.2.6.2 yamt rp->rc_tcpseq = tmp_seq;
600 1.2.6.2 yamt rp->rc_flag |= RC_TCPSEQ;
601 1.2.6.2 yamt mtx_unlock(mutex);
602 1.2.6.2 yamt }
603 1.2.6.2 yamt }
604 1.2.6.2 yamt nfsrc_unlock(rp);
605 1.2.6.2 yamt }
606 1.2.6.2 yamt
607 1.2.6.2 yamt /*
608 1.2.6.2 yamt * Get a cache entry for TCP
609 1.2.6.2 yamt * - key on <xid, nfs version>
610 1.2.6.2 yamt * (allow multiple entries for a given key)
611 1.2.6.2 yamt */
612 1.2.6.2 yamt static int
613 1.2.6.2 yamt nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
614 1.2.6.2 yamt {
615 1.2.6.2 yamt struct nfsrvcache *rp, *nextrp;
616 1.2.6.2 yamt int i;
617 1.2.6.2 yamt struct nfsrvcache *hitrp;
618 1.2.6.2 yamt struct nfsrvhashhead *hp, nfsrc_templist;
619 1.2.6.2 yamt int hit, ret = 0;
620 1.2.6.2 yamt struct mtx *mutex;
621 1.2.6.2 yamt
622 1.2.6.2 yamt mutex = nfsrc_cachemutex(newrp);
623 1.2.6.2 yamt hp = NFSRCHASH(newrp->rc_xid);
624 1.2.6.2 yamt newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
625 1.2.6.2 yamt tryagain:
626 1.2.6.2 yamt mtx_lock(mutex);
627 1.2.6.2 yamt hit = 1;
628 1.2.6.2 yamt LIST_INIT(&nfsrc_templist);
629 1.2.6.2 yamt /*
630 1.2.6.2 yamt * Get all the matches and put them on the temp list.
631 1.2.6.2 yamt */
632 1.2.6.2 yamt rp = LIST_FIRST(hp);
633 1.2.6.2 yamt while (rp != NULL) {
634 1.2.6.2 yamt nextrp = LIST_NEXT(rp, rc_hash);
635 1.2.6.2 yamt if (newrp->rc_xid == rp->rc_xid &&
636 1.2.6.2 yamt (!(rp->rc_flag & RC_INPROG) ||
637 1.2.6.2 yamt ((newrp->rc_flag & RC_SAMETCPCONN) &&
638 1.2.6.2 yamt newrp->rc_sockref == rp->rc_sockref)) &&
639 1.2.6.2 yamt (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
640 1.2.6.2 yamt newrp->rc_proc == rp->rc_proc &&
641 1.2.6.2 yamt ((newrp->rc_flag & RC_NFSV4) &&
642 1.2.6.2 yamt newrp->rc_sockref != rp->rc_sockref &&
643 1.2.6.2 yamt newrp->rc_cachetime >= rp->rc_cachetime)
644 1.2.6.2 yamt && newrp->rc_reqlen == rp->rc_reqlen &&
645 1.2.6.2 yamt newrp->rc_cksum == rp->rc_cksum) {
646 1.2.6.2 yamt LIST_REMOVE(rp, rc_hash);
647 1.2.6.2 yamt LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
648 1.2.6.2 yamt }
649 1.2.6.2 yamt rp = nextrp;
650 1.2.6.2 yamt }
651 1.2.6.2 yamt
652 1.2.6.2 yamt /*
653 1.2.6.2 yamt * Now, use nfsrc_templist to decide if there is a match.
654 1.2.6.2 yamt */
655 1.2.6.2 yamt i = 0;
656 1.2.6.2 yamt LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
657 1.2.6.2 yamt i++;
658 1.2.6.2 yamt if (rp->rc_refcnt > 0) {
659 1.2.6.2 yamt hit = 0;
660 1.2.6.2 yamt break;
661 1.2.6.2 yamt }
662 1.2.6.2 yamt }
663 1.2.6.2 yamt /*
664 1.2.6.2 yamt * Can be a hit only if one entry left.
665 1.2.6.2 yamt * Note possible hit entry and put nfsrc_templist back on hash
666 1.2.6.2 yamt * list.
667 1.2.6.2 yamt */
668 1.2.6.2 yamt if (i != 1)
669 1.2.6.2 yamt hit = 0;
670 1.2.6.2 yamt hitrp = rp = LIST_FIRST(&nfsrc_templist);
671 1.2.6.2 yamt while (rp != NULL) {
672 1.2.6.2 yamt nextrp = LIST_NEXT(rp, rc_hash);
673 1.2.6.2 yamt LIST_REMOVE(rp, rc_hash);
674 1.2.6.2 yamt LIST_INSERT_HEAD(hp, rp, rc_hash);
675 1.2.6.2 yamt rp = nextrp;
676 1.2.6.2 yamt }
677 1.2.6.2 yamt if (LIST_FIRST(&nfsrc_templist) != NULL)
678 1.2.6.2 yamt panic("nfs gettcp cache templist");
679 1.2.6.2 yamt
680 1.2.6.2 yamt if (hit) {
681 1.2.6.2 yamt rp = hitrp;
682 1.2.6.2 yamt if ((rp->rc_flag & RC_LOCKED) != 0) {
683 1.2.6.2 yamt rp->rc_flag |= RC_WANTED;
684 1.2.6.2 yamt (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
685 1.2.6.2 yamt "nfsrc", 10 * hz);
686 1.2.6.2 yamt goto tryagain;
687 1.2.6.2 yamt }
688 1.2.6.2 yamt if (rp->rc_flag == 0)
689 1.2.6.2 yamt panic("nfs tcp cache0");
690 1.2.6.2 yamt rp->rc_flag |= RC_LOCKED;
691 1.2.6.2 yamt if (rp->rc_flag & RC_INPROG) {
692 1.2.6.2 yamt newnfsstats.srvcache_inproghits++;
693 1.2.6.2 yamt mtx_unlock(mutex);
694 1.2.6.2 yamt if (newrp->rc_sockref == rp->rc_sockref)
695 1.2.6.2 yamt nfsrc_marksametcpconn(rp->rc_sockref);
696 1.2.6.2 yamt ret = RC_DROPIT;
697 1.2.6.2 yamt } else if (rp->rc_flag & RC_REPSTATUS) {
698 1.2.6.2 yamt /*
699 1.2.6.2 yamt * V2 only.
700 1.2.6.2 yamt */
701 1.2.6.2 yamt newnfsstats.srvcache_nonidemdonehits++;
702 1.2.6.2 yamt mtx_unlock(mutex);
703 1.2.6.2 yamt if (newrp->rc_sockref == rp->rc_sockref)
704 1.2.6.2 yamt nfsrc_marksametcpconn(rp->rc_sockref);
705 1.2.6.2 yamt ret = RC_REPLY;
706 1.2.6.2 yamt nfsrvd_rephead(nd);
707 1.2.6.2 yamt *(nd->nd_errp) = rp->rc_status;
708 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
709 1.2.6.2 yamt } else if (rp->rc_flag & RC_REPMBUF) {
710 1.2.6.2 yamt newnfsstats.srvcache_nonidemdonehits++;
711 1.2.6.2 yamt mtx_unlock(mutex);
712 1.2.6.2 yamt if (newrp->rc_sockref == rp->rc_sockref)
713 1.2.6.2 yamt nfsrc_marksametcpconn(rp->rc_sockref);
714 1.2.6.2 yamt ret = RC_REPLY;
715 1.2.6.2 yamt nd->nd_mreq = m_copym(rp->rc_reply, 0,
716 1.2.6.2 yamt M_COPYALL, M_WAITOK);
717 1.2.6.2 yamt rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
718 1.2.6.2 yamt } else {
719 1.2.6.2 yamt panic("nfs tcp cache1");
720 1.2.6.2 yamt }
721 1.2.6.2 yamt nfsrc_unlock(rp);
722 1.2.6.2 yamt free((caddr_t)newrp, M_NFSRVCACHE);
723 1.2.6.2 yamt goto out;
724 1.2.6.2 yamt }
725 1.2.6.2 yamt newnfsstats.srvcache_misses++;
726 1.2.6.2 yamt atomic_add_int(&newnfsstats.srvcache_size, 1);
727 1.2.6.2 yamt
728 1.2.6.2 yamt /*
729 1.2.6.2 yamt * For TCP, multiple entries for a key are allowed, so don't
730 1.2.6.2 yamt * chain it into the hash table until done.
731 1.2.6.2 yamt */
732 1.2.6.2 yamt newrp->rc_cachetime = NFSD_MONOSEC;
733 1.2.6.2 yamt newrp->rc_flag |= RC_INPROG;
734 1.2.6.2 yamt LIST_INSERT_HEAD(hp, newrp, rc_hash);
735 1.2.6.2 yamt mtx_unlock(mutex);
736 1.2.6.2 yamt nd->nd_rp = newrp;
737 1.2.6.2 yamt ret = RC_DOIT;
738 1.2.6.2 yamt
739 1.2.6.2 yamt out:
740 1.2.6.2 yamt NFSEXITCODE2(0, nd);
741 1.2.6.2 yamt return (ret);
742 1.2.6.2 yamt }
743 1.2.6.2 yamt
744 1.2.6.2 yamt /*
745 1.2.6.2 yamt * Lock a cache entry.
746 1.2.6.2 yamt */
747 1.2.6.2 yamt static void
748 1.2.6.2 yamt nfsrc_lock(struct nfsrvcache *rp)
749 1.2.6.2 yamt {
750 1.2.6.2 yamt struct mtx *mutex;
751 1.2.6.2 yamt
752 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
753 1.2.6.2 yamt mtx_assert(mutex, MA_OWNED);
754 1.2.6.2 yamt while ((rp->rc_flag & RC_LOCKED) != 0) {
755 1.2.6.2 yamt rp->rc_flag |= RC_WANTED;
756 1.2.6.2 yamt (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
757 1.2.6.2 yamt }
758 1.2.6.2 yamt rp->rc_flag |= RC_LOCKED;
759 1.2.6.2 yamt }
760 1.2.6.2 yamt
761 1.2.6.2 yamt /*
762 1.2.6.2 yamt * Unlock a cache entry.
763 1.2.6.2 yamt */
764 1.2.6.2 yamt static void
765 1.2.6.2 yamt nfsrc_unlock(struct nfsrvcache *rp)
766 1.2.6.2 yamt {
767 1.2.6.2 yamt struct mtx *mutex;
768 1.2.6.2 yamt
769 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
770 1.2.6.2 yamt mtx_lock(mutex);
771 1.2.6.2 yamt rp->rc_flag &= ~RC_LOCKED;
772 1.2.6.2 yamt nfsrc_wanted(rp);
773 1.2.6.2 yamt mtx_unlock(mutex);
774 1.2.6.2 yamt }
775 1.2.6.2 yamt
776 1.2.6.2 yamt /*
777 1.2.6.2 yamt * Wakeup anyone wanting entry.
778 1.2.6.2 yamt */
779 1.2.6.2 yamt static void
780 1.2.6.2 yamt nfsrc_wanted(struct nfsrvcache *rp)
781 1.2.6.2 yamt {
782 1.2.6.2 yamt if (rp->rc_flag & RC_WANTED) {
783 1.2.6.2 yamt rp->rc_flag &= ~RC_WANTED;
784 1.2.6.2 yamt wakeup((caddr_t)rp);
785 1.2.6.2 yamt }
786 1.2.6.2 yamt }
787 1.2.6.2 yamt
788 1.2.6.2 yamt /*
789 1.2.6.2 yamt * Free up the entry.
790 1.2.6.2 yamt * Must not sleep.
791 1.2.6.2 yamt */
792 1.2.6.2 yamt static void
793 1.2.6.2 yamt nfsrc_freecache(struct nfsrvcache *rp)
794 1.2.6.2 yamt {
795 1.2.6.2 yamt
796 1.2.6.2 yamt LIST_REMOVE(rp, rc_hash);
797 1.2.6.2 yamt if (rp->rc_flag & RC_UDP) {
798 1.2.6.2 yamt TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
799 1.2.6.2 yamt nfsrc_udpcachesize--;
800 1.2.6.2 yamt }
801 1.2.6.2 yamt nfsrc_wanted(rp);
802 1.2.6.2 yamt if (rp->rc_flag & RC_REPMBUF) {
803 1.2.6.2 yamt mbuf_freem(rp->rc_reply);
804 1.2.6.2 yamt if (!(rp->rc_flag & RC_UDP))
805 1.2.6.2 yamt atomic_add_int(&nfsrc_tcpsavedreplies, -1);
806 1.2.6.2 yamt }
807 1.2.6.2 yamt FREE((caddr_t)rp, M_NFSRVCACHE);
808 1.2.6.2 yamt atomic_add_int(&newnfsstats.srvcache_size, -1);
809 1.2.6.2 yamt }
810 1.2.6.2 yamt
811 1.2.6.2 yamt /*
812 1.2.6.2 yamt * Clean out the cache. Called when nfsserver module is unloaded.
813 1.2.6.2 yamt */
814 1.2.6.2 yamt APPLESTATIC void
815 1.2.6.2 yamt nfsrvd_cleancache(void)
816 1.2.6.2 yamt {
817 1.2.6.2 yamt struct nfsrvcache *rp, *nextrp;
818 1.2.6.2 yamt int i;
819 1.2.6.2 yamt
820 1.2.6.2 yamt for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
821 1.2.6.2 yamt mtx_lock(&nfsrchash_table[i].mtx);
822 1.2.6.2 yamt LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
823 1.2.6.2 yamt nfsrc_freecache(rp);
824 1.2.6.2 yamt mtx_unlock(&nfsrchash_table[i].mtx);
825 1.2.6.2 yamt }
826 1.2.6.2 yamt mtx_lock(&nfsrc_udpmtx);
827 1.2.6.2 yamt for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
828 1.2.6.2 yamt LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
829 1.2.6.2 yamt nfsrc_freecache(rp);
830 1.2.6.2 yamt }
831 1.2.6.2 yamt }
832 1.2.6.2 yamt newnfsstats.srvcache_size = 0;
833 1.2.6.2 yamt mtx_unlock(&nfsrc_udpmtx);
834 1.2.6.2 yamt nfsrc_tcpsavedreplies = 0;
835 1.2.6.2 yamt }
836 1.2.6.2 yamt
837 1.2.6.2 yamt /*
838 1.2.6.2 yamt * The basic rule is to get rid of entries that are expired.
839 1.2.6.2 yamt */
840 1.2.6.2 yamt static void
841 1.2.6.2 yamt nfsrc_trimcache(u_int64_t sockref, struct socket *so)
842 1.2.6.2 yamt {
843 1.2.6.2 yamt struct nfsrvcache *rp, *nextrp;
844 1.2.6.2 yamt int i, j, k, time_histo[10];
845 1.2.6.2 yamt time_t thisstamp;
846 1.2.6.2 yamt static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
847 1.2.6.2 yamt static int onethread = 0;
848 1.2.6.2 yamt
849 1.2.6.2 yamt if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
850 1.2.6.2 yamt return;
851 1.2.6.2 yamt if (NFSD_MONOSEC != udp_lasttrim ||
852 1.2.6.2 yamt nfsrc_udpcachesize >= (nfsrc_udphighwater +
853 1.2.6.2 yamt nfsrc_udphighwater / 2)) {
854 1.2.6.2 yamt mtx_lock(&nfsrc_udpmtx);
855 1.2.6.2 yamt udp_lasttrim = NFSD_MONOSEC;
856 1.2.6.2 yamt TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
857 1.2.6.2 yamt if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
858 1.2.6.2 yamt && rp->rc_refcnt == 0
859 1.2.6.2 yamt && ((rp->rc_flag & RC_REFCNT) ||
860 1.2.6.2 yamt udp_lasttrim > rp->rc_timestamp ||
861 1.2.6.2 yamt nfsrc_udpcachesize > nfsrc_udphighwater))
862 1.2.6.2 yamt nfsrc_freecache(rp);
863 1.2.6.2 yamt }
864 1.2.6.2 yamt mtx_unlock(&nfsrc_udpmtx);
865 1.2.6.2 yamt }
866 1.2.6.2 yamt if (NFSD_MONOSEC != tcp_lasttrim ||
867 1.2.6.2 yamt nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
868 1.2.6.2 yamt for (i = 0; i < 10; i++)
869 1.2.6.2 yamt time_histo[i] = 0;
870 1.2.6.2 yamt for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
871 1.2.6.2 yamt mtx_lock(&nfsrchash_table[i].mtx);
872 1.2.6.2 yamt if (i == 0)
873 1.2.6.2 yamt tcp_lasttrim = NFSD_MONOSEC;
874 1.2.6.2 yamt LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
875 1.2.6.2 yamt nextrp) {
876 1.2.6.2 yamt if (!(rp->rc_flag &
877 1.2.6.2 yamt (RC_INPROG|RC_LOCKED|RC_WANTED))
878 1.2.6.2 yamt && rp->rc_refcnt == 0) {
879 1.2.6.2 yamt /*
880 1.2.6.2 yamt * The timestamps range from roughly the
881 1.2.6.2 yamt * present (tcp_lasttrim) to the present
882 1.2.6.2 yamt * + nfsrc_tcptimeout. Generate a simple
883 1.2.6.2 yamt * histogram of where the timeouts fall.
884 1.2.6.2 yamt */
885 1.2.6.2 yamt j = rp->rc_timestamp - tcp_lasttrim;
886 1.2.6.2 yamt if (j >= nfsrc_tcptimeout)
887 1.2.6.2 yamt j = nfsrc_tcptimeout - 1;
888 1.2.6.2 yamt if (j < 0)
889 1.2.6.2 yamt j = 0;
890 1.2.6.2 yamt j = (j * 10 / nfsrc_tcptimeout) % 10;
891 1.2.6.2 yamt time_histo[j]++;
892 1.2.6.2 yamt if ((rp->rc_flag & RC_REFCNT) ||
893 1.2.6.2 yamt tcp_lasttrim > rp->rc_timestamp ||
894 1.2.6.2 yamt nfsrc_activesocket(rp, sockref, so))
895 1.2.6.2 yamt nfsrc_freecache(rp);
896 1.2.6.2 yamt }
897 1.2.6.2 yamt }
898 1.2.6.2 yamt mtx_unlock(&nfsrchash_table[i].mtx);
899 1.2.6.2 yamt }
900 1.2.6.2 yamt j = nfsrc_tcphighwater / 5; /* 20% of it */
901 1.2.6.2 yamt if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) {
902 1.2.6.2 yamt /*
903 1.2.6.2 yamt * Trim some more with a smaller timeout of as little
904 1.2.6.2 yamt * as 20% of nfsrc_tcptimeout to try and get below
905 1.2.6.2 yamt * 80% of the nfsrc_tcphighwater.
906 1.2.6.2 yamt */
907 1.2.6.2 yamt k = 0;
908 1.2.6.2 yamt for (i = 0; i < 8; i++) {
909 1.2.6.2 yamt k += time_histo[i];
910 1.2.6.2 yamt if (k > j)
911 1.2.6.2 yamt break;
912 1.2.6.2 yamt }
913 1.2.6.2 yamt k = nfsrc_tcptimeout * (i + 1) / 10;
914 1.2.6.2 yamt if (k < 1)
915 1.2.6.2 yamt k = 1;
916 1.2.6.2 yamt thisstamp = tcp_lasttrim + k;
917 1.2.6.2 yamt for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
918 1.2.6.2 yamt mtx_lock(&nfsrchash_table[i].mtx);
919 1.2.6.2 yamt LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
920 1.2.6.2 yamt rc_hash, nextrp) {
921 1.2.6.2 yamt if (!(rp->rc_flag &
922 1.2.6.2 yamt (RC_INPROG|RC_LOCKED|RC_WANTED))
923 1.2.6.2 yamt && rp->rc_refcnt == 0
924 1.2.6.2 yamt && ((rp->rc_flag & RC_REFCNT) ||
925 1.2.6.2 yamt thisstamp > rp->rc_timestamp ||
926 1.2.6.2 yamt nfsrc_activesocket(rp, sockref,
927 1.2.6.2 yamt so)))
928 1.2.6.2 yamt nfsrc_freecache(rp);
929 1.2.6.2 yamt }
930 1.2.6.2 yamt mtx_unlock(&nfsrchash_table[i].mtx);
931 1.2.6.2 yamt }
932 1.2.6.2 yamt }
933 1.2.6.2 yamt }
934 1.2.6.2 yamt atomic_store_rel_int(&onethread, 0);
935 1.2.6.2 yamt }
936 1.2.6.2 yamt
937 1.2.6.2 yamt /*
938 1.2.6.2 yamt * Add a seqid# reference to the cache entry.
939 1.2.6.2 yamt */
940 1.2.6.2 yamt APPLESTATIC void
941 1.2.6.2 yamt nfsrvd_refcache(struct nfsrvcache *rp)
942 1.2.6.2 yamt {
943 1.2.6.2 yamt struct mtx *mutex;
944 1.2.6.2 yamt
945 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
946 1.2.6.2 yamt mtx_lock(mutex);
947 1.2.6.2 yamt if (rp->rc_refcnt < 0)
948 1.2.6.2 yamt panic("nfs cache refcnt");
949 1.2.6.2 yamt rp->rc_refcnt++;
950 1.2.6.2 yamt mtx_unlock(mutex);
951 1.2.6.2 yamt }
952 1.2.6.2 yamt
953 1.2.6.2 yamt /*
954 1.2.6.2 yamt * Dereference a seqid# cache entry.
955 1.2.6.2 yamt */
956 1.2.6.2 yamt APPLESTATIC void
957 1.2.6.2 yamt nfsrvd_derefcache(struct nfsrvcache *rp)
958 1.2.6.2 yamt {
959 1.2.6.2 yamt struct mtx *mutex;
960 1.2.6.2 yamt
961 1.2.6.2 yamt mutex = nfsrc_cachemutex(rp);
962 1.2.6.2 yamt mtx_lock(mutex);
963 1.2.6.2 yamt if (rp->rc_refcnt <= 0)
964 1.2.6.2 yamt panic("nfs cache derefcnt");
965 1.2.6.2 yamt rp->rc_refcnt--;
966 1.2.6.2 yamt if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
967 1.2.6.2 yamt nfsrc_freecache(rp);
968 1.2.6.2 yamt mtx_unlock(mutex);
969 1.2.6.2 yamt }
970 1.2.6.2 yamt
971 1.2.6.2 yamt /*
972 1.2.6.2 yamt * Check to see if the socket is active.
973 1.2.6.2 yamt * Return 1 if the reply has been received/acknowledged by the client,
974 1.2.6.2 yamt * 0 otherwise.
975 1.2.6.2 yamt * XXX - Uses tcp internals.
976 1.2.6.2 yamt */
977 1.2.6.2 yamt static int
978 1.2.6.2 yamt nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
979 1.2.6.2 yamt struct socket *cur_so)
980 1.2.6.2 yamt {
981 1.2.6.2 yamt int ret = 0;
982 1.2.6.2 yamt
983 1.2.6.2 yamt if (!(rp->rc_flag & RC_TCPSEQ))
984 1.2.6.2 yamt return (ret);
985 1.2.6.2 yamt /*
986 1.2.6.2 yamt * If the sockref is the same, it is the same TCP connection.
987 1.2.6.2 yamt */
988 1.2.6.2 yamt if (cur_sockref == rp->rc_sockref)
989 1.2.6.2 yamt ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
990 1.2.6.2 yamt return (ret);
991 1.2.6.2 yamt }
992 1.2.6.2 yamt
993 1.2.6.2 yamt /*
994 1.2.6.2 yamt * Calculate the length of the mbuf list and a checksum on the first up to
995 1.2.6.2 yamt * NFSRVCACHE_CHECKLEN bytes.
996 1.2.6.2 yamt */
997 1.2.6.2 yamt static int
998 1.2.6.2 yamt nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
999 1.2.6.2 yamt {
1000 1.2.6.2 yamt int len = 0, cklen;
1001 1.2.6.2 yamt mbuf_t m;
1002 1.2.6.2 yamt
1003 1.2.6.2 yamt m = m1;
1004 1.2.6.2 yamt while (m) {
1005 1.2.6.2 yamt len += mbuf_len(m);
1006 1.2.6.2 yamt m = mbuf_next(m);
1007 1.2.6.2 yamt }
1008 1.2.6.2 yamt cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
1009 1.2.6.2 yamt *cksum = in_cksum(m1, cklen);
1010 1.2.6.2 yamt return (len);
1011 1.2.6.2 yamt }
1012 1.2.6.2 yamt
1013 1.2.6.2 yamt /*
1014 1.2.6.2 yamt * Mark a TCP connection that is seeing retries. Should never happen for
1015 1.2.6.2 yamt * NFSv4.
1016 1.2.6.2 yamt */
1017 1.2.6.2 yamt static void
1018 1.2.6.2 yamt nfsrc_marksametcpconn(u_int64_t sockref)
1019 1.2.6.2 yamt {
1020 1.2.6.2 yamt }
1021 1.2.6.2 yamt
1022