nfs_nfsdcache.c revision 1.2.10.3 1 1.2.10.2 tls /* $NetBSD: nfs_nfsdcache.c,v 1.2.10.3 2017/12/03 11:38:42 jdolecek Exp $ */
2 1.2.10.2 tls /*-
3 1.2.10.2 tls * Copyright (c) 1989, 1993
4 1.2.10.2 tls * The Regents of the University of California. All rights reserved.
5 1.2.10.2 tls *
6 1.2.10.2 tls * This code is derived from software contributed to Berkeley by
7 1.2.10.2 tls * Rick Macklem at The University of Guelph.
8 1.2.10.2 tls *
9 1.2.10.2 tls * Redistribution and use in source and binary forms, with or without
10 1.2.10.2 tls * modification, are permitted provided that the following conditions
11 1.2.10.2 tls * are met:
12 1.2.10.2 tls * 1. Redistributions of source code must retain the above copyright
13 1.2.10.2 tls * notice, this list of conditions and the following disclaimer.
14 1.2.10.2 tls * 2. Redistributions in binary form must reproduce the above copyright
15 1.2.10.2 tls * notice, this list of conditions and the following disclaimer in the
16 1.2.10.2 tls * documentation and/or other materials provided with the distribution.
17 1.2.10.2 tls * 4. Neither the name of the University nor the names of its contributors
18 1.2.10.2 tls * may be used to endorse or promote products derived from this software
19 1.2.10.2 tls * without specific prior written permission.
20 1.2.10.2 tls *
21 1.2.10.2 tls * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 1.2.10.2 tls * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 1.2.10.2 tls * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 1.2.10.2 tls * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 1.2.10.2 tls * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 1.2.10.2 tls * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 1.2.10.2 tls * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 1.2.10.2 tls * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 1.2.10.2 tls * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 1.2.10.2 tls * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 1.2.10.2 tls * SUCH DAMAGE.
32 1.2.10.2 tls *
33 1.2.10.2 tls */
34 1.2.10.2 tls
35 1.2.10.2 tls #include <sys/cdefs.h>
36 1.2.10.3 jdolecek /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 304026 2016-08-12 22:44:59Z rmacklem "); */
37 1.2.10.2 tls __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.2.10.3 2017/12/03 11:38:42 jdolecek Exp $");
38 1.2.10.2 tls
39 1.2.10.2 tls /*
40 1.2.10.2 tls * Here is the basic algorithm:
41 1.2.10.2 tls * First, some design criteria I used:
42 1.2.10.2 tls * - I think a false hit is more serious than a false miss
43 1.2.10.2 tls * - A false hit for an RPC that has Op(s) that order via seqid# must be
44 1.2.10.2 tls * avoided at all cost
45 1.2.10.2 tls * - A valid hit will probably happen a long time after the original reply
46 1.2.10.2 tls * and the TCP socket that the original request was received on will no
47 1.2.10.2 tls * longer be active
48 1.2.10.2 tls * (The long time delay implies to me that LRU is not appropriate.)
49 1.2.10.2 tls * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
50 1.2.10.2 tls * in them as well as minimizing the risk of redoing retried non-idempotent
51 1.2.10.2 tls * Ops.
52 1.2.10.2 tls * Because it is biased towards avoiding false hits, multiple entries with
53 1.2.10.2 tls * the same xid are to be expected, especially for the case of the entry
54 1.2.10.2 tls * in the cache being related to a seqid# sequenced Op.
55 1.2.10.2 tls *
56 1.2.10.2 tls * The basic algorithm I'm about to code up:
57 1.2.10.2 tls * - Null RPCs bypass the cache and are just done
58 1.2.10.2 tls * For TCP
59 1.2.10.2 tls * - key on <xid, NFS version> (as noted above, there can be several
60 1.2.10.2 tls * entries with the same key)
61 1.2.10.2 tls * When a request arrives:
62 1.2.10.2 tls * For all that match key
63 1.2.10.2 tls * - if RPC# != OR request_size !=
64 1.2.10.2 tls * - not a match with this one
65 1.2.10.2 tls * - if NFSv4 and received on same TCP socket OR
66 1.2.10.2 tls * received on a TCP connection created before the
67 1.2.10.2 tls * entry was cached
68 1.2.10.2 tls * - not a match with this one
69 1.2.10.2 tls * (V2,3 clients might retry on same TCP socket)
70 1.2.10.2 tls * - calculate checksum on first N bytes of NFS XDR
71 1.2.10.2 tls * - if checksum !=
72 1.2.10.2 tls * - not a match for this one
73 1.2.10.2 tls * If any of the remaining ones that match has a
74 1.2.10.2 tls * seqid_refcnt > 0
75 1.2.10.2 tls * - not a match (go do RPC, using new cache entry)
76 1.2.10.2 tls * If one match left
77 1.2.10.2 tls * - a hit (reply from cache)
78 1.2.10.2 tls * else
79 1.2.10.2 tls * - miss (go do RPC, using new cache entry)
80 1.2.10.2 tls *
81 1.2.10.2 tls * During processing of NFSv4 request:
82 1.2.10.2 tls * - set a flag when a non-idempotent Op is processed
83 1.2.10.2 tls * - when an Op that uses a seqid# (Open,...) is processed
84 1.2.10.2 tls * - if same seqid# as referenced entry in cache
85 1.2.10.2 tls * - free new cache entry
86 1.2.10.2 tls * - reply from referenced cache entry
87 1.2.10.2 tls * else if next seqid# in order
88 1.2.10.2 tls * - free referenced cache entry
89 1.2.10.2 tls * - increment seqid_refcnt on new cache entry
90 1.2.10.2 tls * - set pointer from Openowner/Lockowner to
91 1.2.10.2 tls * new cache entry (aka reference it)
92 1.2.10.2 tls * else if first seqid# in sequence
93 1.2.10.2 tls * - increment seqid_refcnt on new cache entry
94 1.2.10.2 tls * - set pointer from Openowner/Lockowner to
95 1.2.10.2 tls * new cache entry (aka reference it)
96 1.2.10.2 tls *
97 1.2.10.2 tls * At end of RPC processing:
98 1.2.10.2 tls * - if seqid_refcnt > 0 OR flagged non-idempotent on new
99 1.2.10.2 tls * cache entry
100 1.2.10.2 tls * - save reply in cache entry
101 1.2.10.2 tls * - calculate checksum on first N bytes of NFS XDR
102 1.2.10.2 tls * request
103 1.2.10.2 tls * - note op and length of XDR request (in bytes)
104 1.2.10.2 tls * - timestamp it
105 1.2.10.2 tls * else
106 1.2.10.2 tls * - free new cache entry
107 1.2.10.2 tls * - Send reply (noting info for socket activity check, below)
108 1.2.10.2 tls *
109 1.2.10.2 tls * For cache entries saved above:
110 1.2.10.2 tls * - if saved since seqid_refcnt was > 0
111 1.2.10.2 tls * - free when seqid_refcnt decrements to 0
112 1.2.10.2 tls * (when next one in sequence is processed above, or
113 1.2.10.2 tls * when Openowner/Lockowner is discarded)
114 1.2.10.2 tls * else { non-idempotent Op(s) }
115 1.2.10.2 tls * - free when
116 1.2.10.2 tls * - some further activity observed on same
117 1.2.10.2 tls * socket
118 1.2.10.2 tls * (I'm not yet sure how I'm going to do
119 1.2.10.2 tls * this. Maybe look at the TCP connection
120 1.2.10.2 tls * to see if the send_tcp_sequence# is well
121 1.2.10.2 tls * past sent reply OR K additional RPCs
122 1.2.10.2 tls * replied on same socket OR?)
123 1.2.10.2 tls * OR
124 1.2.10.2 tls * - when very old (hours, days, weeks?)
125 1.2.10.2 tls *
126 1.2.10.2 tls * For UDP (v2, 3 only), pretty much the old way:
127 1.2.10.2 tls * - key on <xid, NFS version, RPC#, Client host ip#>
128 1.2.10.2 tls * (at most one entry for each key)
129 1.2.10.2 tls *
130 1.2.10.2 tls * When a Request arrives:
131 1.2.10.2 tls * - if a match with entry via key
132 1.2.10.2 tls * - if RPC marked In_progress
133 1.2.10.2 tls * - discard request (don't send reply)
134 1.2.10.2 tls * else
135 1.2.10.2 tls * - reply from cache
136 1.2.10.2 tls * - timestamp cache entry
137 1.2.10.2 tls * else
138 1.2.10.2 tls * - add entry to cache, marked In_progress
139 1.2.10.2 tls * - do RPC
140 1.2.10.2 tls * - when RPC done
141 1.2.10.2 tls * - if RPC# non-idempotent
142 1.2.10.2 tls * - mark entry Done (not In_progress)
143 1.2.10.2 tls * - save reply
144 1.2.10.2 tls * - timestamp cache entry
145 1.2.10.2 tls * else
146 1.2.10.2 tls * - free cache entry
147 1.2.10.2 tls * - send reply
148 1.2.10.2 tls *
149 1.2.10.2 tls * Later, entries with saved replies are free'd a short time (few minutes)
150 1.2.10.2 tls * after reply sent (timestamp).
151 1.2.10.2 tls * Reference: Chet Juszczak, "Improving the Performance and Correctness
152 1.2.10.2 tls * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
153 1.2.10.2 tls * pages 53-63. San Diego, February 1989.
154 1.2.10.2 tls * for the UDP case.
155 1.2.10.2 tls * nfsrc_floodlevel is set to the allowable upper limit for saved replies
156 1.2.10.2 tls * for TCP. For V3, a reply won't be saved when the flood level is
157 1.2.10.2 tls * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
158 1.2.10.2 tls * that case. This level should be set high enough that this almost
159 1.2.10.2 tls * never happens.
160 1.2.10.2 tls */
161 1.2.10.2 tls #ifndef APPLEKEXT
162 1.2.10.3 jdolecek #include <fs/nfs/common/nfsport.h>
163 1.2.10.2 tls
164 1.2.10.3 jdolecek extern struct nfsstatsv1 nfsstatsv1;
165 1.2.10.2 tls extern struct mtx nfsrc_udpmtx;
166 1.2.10.2 tls extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
167 1.2.10.3 jdolecek extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
168 1.2.10.2 tls int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
169 1.2.10.2 tls #endif /* !APPLEKEXT */
170 1.2.10.2 tls
171 1.2.10.2 tls SYSCTL_DECL(_vfs_nfsd);
172 1.2.10.2 tls
173 1.2.10.2 tls static u_int nfsrc_tcphighwater = 0;
174 1.2.10.2 tls static int
175 1.2.10.2 tls sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
176 1.2.10.2 tls {
177 1.2.10.2 tls int error, newhighwater;
178 1.2.10.2 tls
179 1.2.10.2 tls newhighwater = nfsrc_tcphighwater;
180 1.2.10.2 tls error = sysctl_handle_int(oidp, &newhighwater, 0, req);
181 1.2.10.2 tls if (error != 0 || req->newptr == NULL)
182 1.2.10.2 tls return (error);
183 1.2.10.2 tls if (newhighwater < 0)
184 1.2.10.2 tls return (EINVAL);
185 1.2.10.2 tls if (newhighwater >= nfsrc_floodlevel)
186 1.2.10.2 tls nfsrc_floodlevel = newhighwater + newhighwater / 5;
187 1.2.10.2 tls nfsrc_tcphighwater = newhighwater;
188 1.2.10.2 tls return (0);
189 1.2.10.2 tls }
190 1.2.10.2 tls SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
191 1.2.10.2 tls sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
192 1.2.10.2 tls "High water mark for TCP cache entries");
193 1.2.10.2 tls
194 1.2.10.2 tls static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
195 1.2.10.2 tls SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
196 1.2.10.2 tls &nfsrc_udphighwater, 0,
197 1.2.10.2 tls "High water mark for UDP cache entries");
198 1.2.10.2 tls static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
199 1.2.10.2 tls SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
200 1.2.10.2 tls &nfsrc_tcptimeout, 0,
201 1.2.10.2 tls "Timeout for TCP entries in the DRC");
202 1.2.10.2 tls static u_int nfsrc_tcpnonidempotent = 1;
203 1.2.10.2 tls SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
204 1.2.10.2 tls &nfsrc_tcpnonidempotent, 0,
205 1.2.10.2 tls "Enable the DRC for NFS over TCP");
206 1.2.10.2 tls
207 1.2.10.2 tls static int nfsrc_udpcachesize = 0;
208 1.2.10.2 tls static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
209 1.2.10.2 tls static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
210 1.2.10.2 tls
211 1.2.10.2 tls /*
212 1.2.10.2 tls * and the reverse mapping from generic to Version 2 procedure numbers
213 1.2.10.2 tls */
214 1.2.10.2 tls static int newnfsv2_procid[NFS_V3NPROCS] = {
215 1.2.10.2 tls NFSV2PROC_NULL,
216 1.2.10.2 tls NFSV2PROC_GETATTR,
217 1.2.10.2 tls NFSV2PROC_SETATTR,
218 1.2.10.2 tls NFSV2PROC_LOOKUP,
219 1.2.10.2 tls NFSV2PROC_NOOP,
220 1.2.10.2 tls NFSV2PROC_READLINK,
221 1.2.10.2 tls NFSV2PROC_READ,
222 1.2.10.2 tls NFSV2PROC_WRITE,
223 1.2.10.2 tls NFSV2PROC_CREATE,
224 1.2.10.2 tls NFSV2PROC_MKDIR,
225 1.2.10.2 tls NFSV2PROC_SYMLINK,
226 1.2.10.2 tls NFSV2PROC_CREATE,
227 1.2.10.2 tls NFSV2PROC_REMOVE,
228 1.2.10.2 tls NFSV2PROC_RMDIR,
229 1.2.10.2 tls NFSV2PROC_RENAME,
230 1.2.10.2 tls NFSV2PROC_LINK,
231 1.2.10.2 tls NFSV2PROC_READDIR,
232 1.2.10.2 tls NFSV2PROC_NOOP,
233 1.2.10.2 tls NFSV2PROC_STATFS,
234 1.2.10.2 tls NFSV2PROC_NOOP,
235 1.2.10.2 tls NFSV2PROC_NOOP,
236 1.2.10.2 tls NFSV2PROC_NOOP,
237 1.2.10.2 tls };
238 1.2.10.2 tls
239 1.2.10.2 tls #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
240 1.2.10.2 tls #define NFSRCUDPHASH(xid) \
241 1.2.10.2 tls (&nfsrvudphashtbl[nfsrc_hash(xid)])
242 1.2.10.2 tls #define NFSRCHASH(xid) \
243 1.2.10.2 tls (&nfsrchash_table[nfsrc_hash(xid)].tbl)
244 1.2.10.3 jdolecek #define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)])
245 1.2.10.2 tls #define TRUE 1
246 1.2.10.2 tls #define FALSE 0
247 1.2.10.2 tls #define NFSRVCACHE_CHECKLEN 100
248 1.2.10.2 tls
249 1.2.10.2 tls /* True iff the rpc reply is an nfs status ONLY! */
250 1.2.10.2 tls static int nfsv2_repstat[NFS_V3NPROCS] = {
251 1.2.10.2 tls FALSE,
252 1.2.10.2 tls FALSE,
253 1.2.10.2 tls FALSE,
254 1.2.10.2 tls FALSE,
255 1.2.10.2 tls FALSE,
256 1.2.10.2 tls FALSE,
257 1.2.10.2 tls FALSE,
258 1.2.10.2 tls FALSE,
259 1.2.10.2 tls FALSE,
260 1.2.10.2 tls FALSE,
261 1.2.10.2 tls TRUE,
262 1.2.10.2 tls TRUE,
263 1.2.10.2 tls TRUE,
264 1.2.10.2 tls TRUE,
265 1.2.10.2 tls FALSE,
266 1.2.10.2 tls TRUE,
267 1.2.10.2 tls FALSE,
268 1.2.10.2 tls FALSE,
269 1.2.10.2 tls FALSE,
270 1.2.10.2 tls FALSE,
271 1.2.10.2 tls FALSE,
272 1.2.10.2 tls FALSE,
273 1.2.10.2 tls };
274 1.2.10.2 tls
275 1.2.10.2 tls /*
276 1.2.10.2 tls * Will NFS want to work over IPv6 someday?
277 1.2.10.2 tls */
278 1.2.10.2 tls #define NETFAMILY(rp) \
279 1.2.10.2 tls (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
280 1.2.10.2 tls
281 1.2.10.2 tls /* local functions */
282 1.2.10.2 tls static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
283 1.2.10.2 tls static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
284 1.2.10.2 tls static void nfsrc_lock(struct nfsrvcache *rp);
285 1.2.10.2 tls static void nfsrc_unlock(struct nfsrvcache *rp);
286 1.2.10.2 tls static void nfsrc_wanted(struct nfsrvcache *rp);
287 1.2.10.2 tls static void nfsrc_freecache(struct nfsrvcache *rp);
288 1.2.10.2 tls static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
289 1.2.10.2 tls static void nfsrc_marksametcpconn(u_int64_t);
290 1.2.10.2 tls
291 1.2.10.2 tls /*
292 1.2.10.2 tls * Return the correct mutex for this cache entry.
293 1.2.10.2 tls */
294 1.2.10.2 tls static __inline struct mtx *
295 1.2.10.2 tls nfsrc_cachemutex(struct nfsrvcache *rp)
296 1.2.10.2 tls {
297 1.2.10.2 tls
298 1.2.10.2 tls if ((rp->rc_flag & RC_UDP) != 0)
299 1.2.10.2 tls return (&nfsrc_udpmtx);
300 1.2.10.2 tls return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
301 1.2.10.2 tls }
302 1.2.10.2 tls
303 1.2.10.2 tls /*
304 1.2.10.2 tls * Initialize the server request cache list
305 1.2.10.2 tls */
306 1.2.10.2 tls APPLESTATIC void
307 1.2.10.2 tls nfsrvd_initcache(void)
308 1.2.10.2 tls {
309 1.2.10.2 tls int i;
310 1.2.10.2 tls static int inited = 0;
311 1.2.10.2 tls
312 1.2.10.2 tls if (inited)
313 1.2.10.2 tls return;
314 1.2.10.2 tls inited = 1;
315 1.2.10.2 tls for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
316 1.2.10.2 tls LIST_INIT(&nfsrvudphashtbl[i]);
317 1.2.10.2 tls LIST_INIT(&nfsrchash_table[i].tbl);
318 1.2.10.3 jdolecek LIST_INIT(&nfsrcahash_table[i].tbl);
319 1.2.10.2 tls }
320 1.2.10.2 tls TAILQ_INIT(&nfsrvudplru);
321 1.2.10.2 tls nfsrc_tcpsavedreplies = 0;
322 1.2.10.2 tls nfsrc_udpcachesize = 0;
323 1.2.10.3 jdolecek nfsstatsv1.srvcache_tcppeak = 0;
324 1.2.10.3 jdolecek nfsstatsv1.srvcache_size = 0;
325 1.2.10.2 tls }
326 1.2.10.2 tls
327 1.2.10.2 tls /*
328 1.2.10.2 tls * Get a cache entry for this request. Basically just malloc a new one
329 1.2.10.2 tls * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
330 1.2.10.2 tls */
331 1.2.10.2 tls APPLESTATIC int
332 1.2.10.3 jdolecek nfsrvd_getcache(struct nfsrv_descript *nd)
333 1.2.10.2 tls {
334 1.2.10.2 tls struct nfsrvcache *newrp;
335 1.2.10.2 tls int ret;
336 1.2.10.2 tls
337 1.2.10.2 tls if (nd->nd_procnum == NFSPROC_NULL)
338 1.2.10.2 tls panic("nfsd cache null");
339 1.2.10.2 tls MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
340 1.2.10.2 tls M_NFSRVCACHE, M_WAITOK);
341 1.2.10.2 tls NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
342 1.2.10.2 tls if (nd->nd_flag & ND_NFSV4)
343 1.2.10.2 tls newrp->rc_flag = RC_NFSV4;
344 1.2.10.2 tls else if (nd->nd_flag & ND_NFSV3)
345 1.2.10.2 tls newrp->rc_flag = RC_NFSV3;
346 1.2.10.2 tls else
347 1.2.10.2 tls newrp->rc_flag = RC_NFSV2;
348 1.2.10.2 tls newrp->rc_xid = nd->nd_retxid;
349 1.2.10.2 tls newrp->rc_proc = nd->nd_procnum;
350 1.2.10.2 tls newrp->rc_sockref = nd->nd_sockref;
351 1.2.10.2 tls newrp->rc_cachetime = nd->nd_tcpconntime;
352 1.2.10.2 tls if (nd->nd_flag & ND_SAMETCPCONN)
353 1.2.10.2 tls newrp->rc_flag |= RC_SAMETCPCONN;
354 1.2.10.2 tls if (nd->nd_nam2 != NULL) {
355 1.2.10.2 tls newrp->rc_flag |= RC_UDP;
356 1.2.10.2 tls ret = nfsrc_getudp(nd, newrp);
357 1.2.10.2 tls } else {
358 1.2.10.2 tls ret = nfsrc_gettcp(nd, newrp);
359 1.2.10.2 tls }
360 1.2.10.2 tls NFSEXITCODE2(0, nd);
361 1.2.10.2 tls return (ret);
362 1.2.10.2 tls }
363 1.2.10.2 tls
364 1.2.10.2 tls /*
365 1.2.10.2 tls * For UDP (v2, v3):
366 1.2.10.2 tls * - key on <xid, NFS version, RPC#, Client host ip#>
367 1.2.10.2 tls * (at most one entry for each key)
368 1.2.10.2 tls */
369 1.2.10.2 tls static int
370 1.2.10.2 tls nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
371 1.2.10.2 tls {
372 1.2.10.2 tls struct nfsrvcache *rp;
373 1.2.10.2 tls struct sockaddr_in *saddr;
374 1.2.10.2 tls struct sockaddr_in6 *saddr6;
375 1.2.10.2 tls struct nfsrvhashhead *hp;
376 1.2.10.2 tls int ret = 0;
377 1.2.10.2 tls struct mtx *mutex;
378 1.2.10.2 tls
379 1.2.10.2 tls mutex = nfsrc_cachemutex(newrp);
380 1.2.10.2 tls hp = NFSRCUDPHASH(newrp->rc_xid);
381 1.2.10.2 tls loop:
382 1.2.10.2 tls mtx_lock(mutex);
383 1.2.10.2 tls LIST_FOREACH(rp, hp, rc_hash) {
384 1.2.10.2 tls if (newrp->rc_xid == rp->rc_xid &&
385 1.2.10.2 tls newrp->rc_proc == rp->rc_proc &&
386 1.2.10.2 tls (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
387 1.2.10.2 tls nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
388 1.2.10.2 tls if ((rp->rc_flag & RC_LOCKED) != 0) {
389 1.2.10.2 tls rp->rc_flag |= RC_WANTED;
390 1.2.10.2 tls (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
391 1.2.10.2 tls "nfsrc", 10 * hz);
392 1.2.10.2 tls goto loop;
393 1.2.10.2 tls }
394 1.2.10.2 tls if (rp->rc_flag == 0)
395 1.2.10.2 tls panic("nfs udp cache0");
396 1.2.10.2 tls rp->rc_flag |= RC_LOCKED;
397 1.2.10.2 tls TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
398 1.2.10.2 tls TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
399 1.2.10.2 tls if (rp->rc_flag & RC_INPROG) {
400 1.2.10.3 jdolecek nfsstatsv1.srvcache_inproghits++;
401 1.2.10.2 tls mtx_unlock(mutex);
402 1.2.10.2 tls ret = RC_DROPIT;
403 1.2.10.2 tls } else if (rp->rc_flag & RC_REPSTATUS) {
404 1.2.10.2 tls /*
405 1.2.10.2 tls * V2 only.
406 1.2.10.2 tls */
407 1.2.10.3 jdolecek nfsstatsv1.srvcache_nonidemdonehits++;
408 1.2.10.2 tls mtx_unlock(mutex);
409 1.2.10.2 tls nfsrvd_rephead(nd);
410 1.2.10.2 tls *(nd->nd_errp) = rp->rc_status;
411 1.2.10.2 tls ret = RC_REPLY;
412 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC +
413 1.2.10.2 tls NFSRVCACHE_UDPTIMEOUT;
414 1.2.10.2 tls } else if (rp->rc_flag & RC_REPMBUF) {
415 1.2.10.3 jdolecek nfsstatsv1.srvcache_nonidemdonehits++;
416 1.2.10.2 tls mtx_unlock(mutex);
417 1.2.10.2 tls nd->nd_mreq = m_copym(rp->rc_reply, 0,
418 1.2.10.2 tls M_COPYALL, M_WAITOK);
419 1.2.10.2 tls ret = RC_REPLY;
420 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC +
421 1.2.10.2 tls NFSRVCACHE_UDPTIMEOUT;
422 1.2.10.2 tls } else {
423 1.2.10.2 tls panic("nfs udp cache1");
424 1.2.10.2 tls }
425 1.2.10.2 tls nfsrc_unlock(rp);
426 1.2.10.2 tls free((caddr_t)newrp, M_NFSRVCACHE);
427 1.2.10.2 tls goto out;
428 1.2.10.2 tls }
429 1.2.10.2 tls }
430 1.2.10.3 jdolecek nfsstatsv1.srvcache_misses++;
431 1.2.10.3 jdolecek atomic_add_int(&nfsstatsv1.srvcache_size, 1);
432 1.2.10.2 tls nfsrc_udpcachesize++;
433 1.2.10.2 tls
434 1.2.10.2 tls newrp->rc_flag |= RC_INPROG;
435 1.2.10.2 tls saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
436 1.2.10.2 tls if (saddr->sin_family == AF_INET)
437 1.2.10.2 tls newrp->rc_inet = saddr->sin_addr.s_addr;
438 1.2.10.2 tls else if (saddr->sin_family == AF_INET6) {
439 1.2.10.2 tls saddr6 = (struct sockaddr_in6 *)saddr;
440 1.2.10.2 tls NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
441 1.2.10.2 tls sizeof (struct in6_addr));
442 1.2.10.2 tls newrp->rc_flag |= RC_INETIPV6;
443 1.2.10.2 tls }
444 1.2.10.2 tls LIST_INSERT_HEAD(hp, newrp, rc_hash);
445 1.2.10.2 tls TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
446 1.2.10.2 tls mtx_unlock(mutex);
447 1.2.10.2 tls nd->nd_rp = newrp;
448 1.2.10.2 tls ret = RC_DOIT;
449 1.2.10.2 tls
450 1.2.10.2 tls out:
451 1.2.10.2 tls NFSEXITCODE2(0, nd);
452 1.2.10.2 tls return (ret);
453 1.2.10.2 tls }
454 1.2.10.2 tls
455 1.2.10.2 tls /*
456 1.2.10.2 tls * Update a request cache entry after the rpc has been done
457 1.2.10.2 tls */
458 1.2.10.2 tls APPLESTATIC struct nfsrvcache *
459 1.2.10.3 jdolecek nfsrvd_updatecache(struct nfsrv_descript *nd)
460 1.2.10.2 tls {
461 1.2.10.2 tls struct nfsrvcache *rp;
462 1.2.10.2 tls struct nfsrvcache *retrp = NULL;
463 1.2.10.2 tls mbuf_t m;
464 1.2.10.2 tls struct mtx *mutex;
465 1.2.10.2 tls
466 1.2.10.2 tls rp = nd->nd_rp;
467 1.2.10.2 tls if (!rp)
468 1.2.10.2 tls panic("nfsrvd_updatecache null rp");
469 1.2.10.2 tls nd->nd_rp = NULL;
470 1.2.10.2 tls mutex = nfsrc_cachemutex(rp);
471 1.2.10.2 tls mtx_lock(mutex);
472 1.2.10.2 tls nfsrc_lock(rp);
473 1.2.10.2 tls if (!(rp->rc_flag & RC_INPROG))
474 1.2.10.2 tls panic("nfsrvd_updatecache not inprog");
475 1.2.10.2 tls rp->rc_flag &= ~RC_INPROG;
476 1.2.10.2 tls if (rp->rc_flag & RC_UDP) {
477 1.2.10.2 tls TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
478 1.2.10.2 tls TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
479 1.2.10.2 tls }
480 1.2.10.2 tls
481 1.2.10.2 tls /*
482 1.2.10.2 tls * Reply from cache is a special case returned by nfsrv_checkseqid().
483 1.2.10.2 tls */
484 1.2.10.2 tls if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
485 1.2.10.3 jdolecek nfsstatsv1.srvcache_nonidemdonehits++;
486 1.2.10.2 tls mtx_unlock(mutex);
487 1.2.10.2 tls nd->nd_repstat = 0;
488 1.2.10.2 tls if (nd->nd_mreq)
489 1.2.10.2 tls mbuf_freem(nd->nd_mreq);
490 1.2.10.2 tls if (!(rp->rc_flag & RC_REPMBUF))
491 1.2.10.2 tls panic("reply from cache");
492 1.2.10.2 tls nd->nd_mreq = m_copym(rp->rc_reply, 0,
493 1.2.10.2 tls M_COPYALL, M_WAITOK);
494 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
495 1.2.10.2 tls nfsrc_unlock(rp);
496 1.2.10.2 tls goto out;
497 1.2.10.2 tls }
498 1.2.10.2 tls
499 1.2.10.2 tls /*
500 1.2.10.2 tls * If rc_refcnt > 0, save it
501 1.2.10.2 tls * For UDP, save it if ND_SAVEREPLY is set
502 1.2.10.2 tls * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
503 1.2.10.2 tls */
504 1.2.10.2 tls if (nd->nd_repstat != NFSERR_DONTREPLY &&
505 1.2.10.2 tls (rp->rc_refcnt > 0 ||
506 1.2.10.2 tls ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
507 1.2.10.2 tls ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
508 1.2.10.2 tls nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
509 1.2.10.2 tls nfsrc_tcpnonidempotent))) {
510 1.2.10.2 tls if (rp->rc_refcnt > 0) {
511 1.2.10.2 tls if (!(rp->rc_flag & RC_NFSV4))
512 1.2.10.2 tls panic("update_cache refcnt");
513 1.2.10.2 tls rp->rc_flag |= RC_REFCNT;
514 1.2.10.2 tls }
515 1.2.10.2 tls if ((nd->nd_flag & ND_NFSV2) &&
516 1.2.10.2 tls nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
517 1.2.10.2 tls rp->rc_status = nd->nd_repstat;
518 1.2.10.2 tls rp->rc_flag |= RC_REPSTATUS;
519 1.2.10.2 tls mtx_unlock(mutex);
520 1.2.10.2 tls } else {
521 1.2.10.2 tls if (!(rp->rc_flag & RC_UDP)) {
522 1.2.10.2 tls atomic_add_int(&nfsrc_tcpsavedreplies, 1);
523 1.2.10.2 tls if (nfsrc_tcpsavedreplies >
524 1.2.10.3 jdolecek nfsstatsv1.srvcache_tcppeak)
525 1.2.10.3 jdolecek nfsstatsv1.srvcache_tcppeak =
526 1.2.10.2 tls nfsrc_tcpsavedreplies;
527 1.2.10.2 tls }
528 1.2.10.2 tls mtx_unlock(mutex);
529 1.2.10.2 tls m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
530 1.2.10.2 tls mtx_lock(mutex);
531 1.2.10.2 tls rp->rc_reply = m;
532 1.2.10.2 tls rp->rc_flag |= RC_REPMBUF;
533 1.2.10.2 tls mtx_unlock(mutex);
534 1.2.10.2 tls }
535 1.2.10.2 tls if (rp->rc_flag & RC_UDP) {
536 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC +
537 1.2.10.2 tls NFSRVCACHE_UDPTIMEOUT;
538 1.2.10.2 tls nfsrc_unlock(rp);
539 1.2.10.2 tls } else {
540 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
541 1.2.10.2 tls if (rp->rc_refcnt > 0)
542 1.2.10.2 tls nfsrc_unlock(rp);
543 1.2.10.2 tls else
544 1.2.10.2 tls retrp = rp;
545 1.2.10.2 tls }
546 1.2.10.2 tls } else {
547 1.2.10.2 tls nfsrc_freecache(rp);
548 1.2.10.2 tls mtx_unlock(mutex);
549 1.2.10.2 tls }
550 1.2.10.2 tls
551 1.2.10.2 tls out:
552 1.2.10.2 tls NFSEXITCODE2(0, nd);
553 1.2.10.2 tls return (retrp);
554 1.2.10.2 tls }
555 1.2.10.2 tls
556 1.2.10.2 tls /*
557 1.2.10.2 tls * Invalidate and, if possible, free an in prog cache entry.
558 1.2.10.2 tls * Must not sleep.
559 1.2.10.2 tls */
560 1.2.10.2 tls APPLESTATIC void
561 1.2.10.2 tls nfsrvd_delcache(struct nfsrvcache *rp)
562 1.2.10.2 tls {
563 1.2.10.2 tls struct mtx *mutex;
564 1.2.10.2 tls
565 1.2.10.2 tls mutex = nfsrc_cachemutex(rp);
566 1.2.10.2 tls if (!(rp->rc_flag & RC_INPROG))
567 1.2.10.2 tls panic("nfsrvd_delcache not in prog");
568 1.2.10.2 tls mtx_lock(mutex);
569 1.2.10.2 tls rp->rc_flag &= ~RC_INPROG;
570 1.2.10.2 tls if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
571 1.2.10.2 tls nfsrc_freecache(rp);
572 1.2.10.2 tls mtx_unlock(mutex);
573 1.2.10.2 tls }
574 1.2.10.2 tls
575 1.2.10.2 tls /*
576 1.2.10.2 tls * Called after nfsrvd_updatecache() once the reply is sent, to update
577 1.2.10.3 jdolecek * the entry's sequence number and unlock it. The argument is
578 1.2.10.2 tls * the pointer returned by nfsrvd_updatecache().
579 1.2.10.2 tls */
580 1.2.10.2 tls APPLESTATIC void
581 1.2.10.3 jdolecek nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq)
582 1.2.10.2 tls {
583 1.2.10.3 jdolecek struct nfsrchash_bucket *hbp;
584 1.2.10.2 tls
585 1.2.10.3 jdolecek KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked"));
586 1.2.10.3 jdolecek if (have_seq) {
587 1.2.10.3 jdolecek hbp = NFSRCAHASH(rp->rc_sockref);
588 1.2.10.3 jdolecek mtx_lock(&hbp->mtx);
589 1.2.10.3 jdolecek rp->rc_tcpseq = seq;
590 1.2.10.3 jdolecek if (rp->rc_acked != RC_NO_ACK)
591 1.2.10.3 jdolecek LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash);
592 1.2.10.3 jdolecek rp->rc_acked = RC_NO_ACK;
593 1.2.10.3 jdolecek mtx_unlock(&hbp->mtx);
594 1.2.10.2 tls }
595 1.2.10.2 tls nfsrc_unlock(rp);
596 1.2.10.2 tls }
597 1.2.10.2 tls
598 1.2.10.2 tls /*
599 1.2.10.2 tls * Get a cache entry for TCP
600 1.2.10.2 tls * - key on <xid, nfs version>
601 1.2.10.2 tls * (allow multiple entries for a given key)
602 1.2.10.2 tls */
603 1.2.10.2 tls static int
604 1.2.10.2 tls nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
605 1.2.10.2 tls {
606 1.2.10.2 tls struct nfsrvcache *rp, *nextrp;
607 1.2.10.2 tls int i;
608 1.2.10.2 tls struct nfsrvcache *hitrp;
609 1.2.10.2 tls struct nfsrvhashhead *hp, nfsrc_templist;
610 1.2.10.2 tls int hit, ret = 0;
611 1.2.10.2 tls struct mtx *mutex;
612 1.2.10.2 tls
613 1.2.10.2 tls mutex = nfsrc_cachemutex(newrp);
614 1.2.10.2 tls hp = NFSRCHASH(newrp->rc_xid);
615 1.2.10.2 tls newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
616 1.2.10.2 tls tryagain:
617 1.2.10.2 tls mtx_lock(mutex);
618 1.2.10.2 tls hit = 1;
619 1.2.10.2 tls LIST_INIT(&nfsrc_templist);
620 1.2.10.2 tls /*
621 1.2.10.2 tls * Get all the matches and put them on the temp list.
622 1.2.10.2 tls */
623 1.2.10.2 tls rp = LIST_FIRST(hp);
624 1.2.10.2 tls while (rp != NULL) {
625 1.2.10.2 tls nextrp = LIST_NEXT(rp, rc_hash);
626 1.2.10.2 tls if (newrp->rc_xid == rp->rc_xid &&
627 1.2.10.2 tls (!(rp->rc_flag & RC_INPROG) ||
628 1.2.10.2 tls ((newrp->rc_flag & RC_SAMETCPCONN) &&
629 1.2.10.2 tls newrp->rc_sockref == rp->rc_sockref)) &&
630 1.2.10.2 tls (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
631 1.2.10.2 tls newrp->rc_proc == rp->rc_proc &&
632 1.2.10.2 tls ((newrp->rc_flag & RC_NFSV4) &&
633 1.2.10.2 tls newrp->rc_sockref != rp->rc_sockref &&
634 1.2.10.2 tls newrp->rc_cachetime >= rp->rc_cachetime)
635 1.2.10.2 tls && newrp->rc_reqlen == rp->rc_reqlen &&
636 1.2.10.2 tls newrp->rc_cksum == rp->rc_cksum) {
637 1.2.10.2 tls LIST_REMOVE(rp, rc_hash);
638 1.2.10.2 tls LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
639 1.2.10.2 tls }
640 1.2.10.2 tls rp = nextrp;
641 1.2.10.2 tls }
642 1.2.10.2 tls
643 1.2.10.2 tls /*
644 1.2.10.2 tls * Now, use nfsrc_templist to decide if there is a match.
645 1.2.10.2 tls */
646 1.2.10.2 tls i = 0;
647 1.2.10.2 tls LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
648 1.2.10.2 tls i++;
649 1.2.10.2 tls if (rp->rc_refcnt > 0) {
650 1.2.10.2 tls hit = 0;
651 1.2.10.2 tls break;
652 1.2.10.2 tls }
653 1.2.10.2 tls }
654 1.2.10.2 tls /*
655 1.2.10.2 tls * Can be a hit only if one entry left.
656 1.2.10.2 tls * Note possible hit entry and put nfsrc_templist back on hash
657 1.2.10.2 tls * list.
658 1.2.10.2 tls */
659 1.2.10.2 tls if (i != 1)
660 1.2.10.2 tls hit = 0;
661 1.2.10.2 tls hitrp = rp = LIST_FIRST(&nfsrc_templist);
662 1.2.10.2 tls while (rp != NULL) {
663 1.2.10.2 tls nextrp = LIST_NEXT(rp, rc_hash);
664 1.2.10.2 tls LIST_REMOVE(rp, rc_hash);
665 1.2.10.2 tls LIST_INSERT_HEAD(hp, rp, rc_hash);
666 1.2.10.2 tls rp = nextrp;
667 1.2.10.2 tls }
668 1.2.10.2 tls if (LIST_FIRST(&nfsrc_templist) != NULL)
669 1.2.10.2 tls panic("nfs gettcp cache templist");
670 1.2.10.2 tls
671 1.2.10.2 tls if (hit) {
672 1.2.10.2 tls rp = hitrp;
673 1.2.10.2 tls if ((rp->rc_flag & RC_LOCKED) != 0) {
674 1.2.10.2 tls rp->rc_flag |= RC_WANTED;
675 1.2.10.2 tls (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
676 1.2.10.2 tls "nfsrc", 10 * hz);
677 1.2.10.2 tls goto tryagain;
678 1.2.10.2 tls }
679 1.2.10.2 tls if (rp->rc_flag == 0)
680 1.2.10.2 tls panic("nfs tcp cache0");
681 1.2.10.2 tls rp->rc_flag |= RC_LOCKED;
682 1.2.10.2 tls if (rp->rc_flag & RC_INPROG) {
683 1.2.10.3 jdolecek nfsstatsv1.srvcache_inproghits++;
684 1.2.10.2 tls mtx_unlock(mutex);
685 1.2.10.2 tls if (newrp->rc_sockref == rp->rc_sockref)
686 1.2.10.2 tls nfsrc_marksametcpconn(rp->rc_sockref);
687 1.2.10.2 tls ret = RC_DROPIT;
688 1.2.10.2 tls } else if (rp->rc_flag & RC_REPSTATUS) {
689 1.2.10.2 tls /*
690 1.2.10.2 tls * V2 only.
691 1.2.10.2 tls */
692 1.2.10.3 jdolecek nfsstatsv1.srvcache_nonidemdonehits++;
693 1.2.10.2 tls mtx_unlock(mutex);
694 1.2.10.2 tls if (newrp->rc_sockref == rp->rc_sockref)
695 1.2.10.2 tls nfsrc_marksametcpconn(rp->rc_sockref);
696 1.2.10.2 tls ret = RC_REPLY;
697 1.2.10.2 tls nfsrvd_rephead(nd);
698 1.2.10.2 tls *(nd->nd_errp) = rp->rc_status;
699 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
700 1.2.10.2 tls } else if (rp->rc_flag & RC_REPMBUF) {
701 1.2.10.3 jdolecek nfsstatsv1.srvcache_nonidemdonehits++;
702 1.2.10.2 tls mtx_unlock(mutex);
703 1.2.10.2 tls if (newrp->rc_sockref == rp->rc_sockref)
704 1.2.10.2 tls nfsrc_marksametcpconn(rp->rc_sockref);
705 1.2.10.2 tls ret = RC_REPLY;
706 1.2.10.2 tls nd->nd_mreq = m_copym(rp->rc_reply, 0,
707 1.2.10.2 tls M_COPYALL, M_WAITOK);
708 1.2.10.2 tls rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
709 1.2.10.2 tls } else {
710 1.2.10.2 tls panic("nfs tcp cache1");
711 1.2.10.2 tls }
712 1.2.10.2 tls nfsrc_unlock(rp);
713 1.2.10.2 tls free((caddr_t)newrp, M_NFSRVCACHE);
714 1.2.10.2 tls goto out;
715 1.2.10.2 tls }
716 1.2.10.3 jdolecek nfsstatsv1.srvcache_misses++;
717 1.2.10.3 jdolecek atomic_add_int(&nfsstatsv1.srvcache_size, 1);
718 1.2.10.2 tls
719 1.2.10.2 tls /*
720 1.2.10.2 tls * For TCP, multiple entries for a key are allowed, so don't
721 1.2.10.2 tls * chain it into the hash table until done.
722 1.2.10.2 tls */
723 1.2.10.2 tls newrp->rc_cachetime = NFSD_MONOSEC;
724 1.2.10.2 tls newrp->rc_flag |= RC_INPROG;
725 1.2.10.2 tls LIST_INSERT_HEAD(hp, newrp, rc_hash);
726 1.2.10.2 tls mtx_unlock(mutex);
727 1.2.10.2 tls nd->nd_rp = newrp;
728 1.2.10.2 tls ret = RC_DOIT;
729 1.2.10.2 tls
730 1.2.10.2 tls out:
731 1.2.10.2 tls NFSEXITCODE2(0, nd);
732 1.2.10.2 tls return (ret);
733 1.2.10.2 tls }
734 1.2.10.2 tls
735 1.2.10.2 tls /*
736 1.2.10.2 tls * Lock a cache entry.
737 1.2.10.2 tls */
738 1.2.10.2 tls static void
739 1.2.10.2 tls nfsrc_lock(struct nfsrvcache *rp)
740 1.2.10.2 tls {
741 1.2.10.2 tls struct mtx *mutex;
742 1.2.10.2 tls
743 1.2.10.2 tls mutex = nfsrc_cachemutex(rp);
744 1.2.10.2 tls mtx_assert(mutex, MA_OWNED);
745 1.2.10.2 tls while ((rp->rc_flag & RC_LOCKED) != 0) {
746 1.2.10.2 tls rp->rc_flag |= RC_WANTED;
747 1.2.10.2 tls (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
748 1.2.10.2 tls }
749 1.2.10.2 tls rp->rc_flag |= RC_LOCKED;
750 1.2.10.2 tls }
751 1.2.10.2 tls
752 1.2.10.2 tls /*
753 1.2.10.2 tls * Unlock a cache entry.
754 1.2.10.2 tls */
755 1.2.10.2 tls static void
756 1.2.10.2 tls nfsrc_unlock(struct nfsrvcache *rp)
757 1.2.10.2 tls {
758 1.2.10.2 tls struct mtx *mutex;
759 1.2.10.2 tls
760 1.2.10.2 tls mutex = nfsrc_cachemutex(rp);
761 1.2.10.2 tls mtx_lock(mutex);
762 1.2.10.2 tls rp->rc_flag &= ~RC_LOCKED;
763 1.2.10.2 tls nfsrc_wanted(rp);
764 1.2.10.2 tls mtx_unlock(mutex);
765 1.2.10.2 tls }
766 1.2.10.2 tls
767 1.2.10.2 tls /*
768 1.2.10.2 tls * Wakeup anyone wanting entry.
769 1.2.10.2 tls */
770 1.2.10.2 tls static void
771 1.2.10.2 tls nfsrc_wanted(struct nfsrvcache *rp)
772 1.2.10.2 tls {
773 1.2.10.2 tls if (rp->rc_flag & RC_WANTED) {
774 1.2.10.2 tls rp->rc_flag &= ~RC_WANTED;
775 1.2.10.2 tls wakeup((caddr_t)rp);
776 1.2.10.2 tls }
777 1.2.10.2 tls }
778 1.2.10.2 tls
779 1.2.10.2 tls /*
780 1.2.10.2 tls * Free up the entry.
781 1.2.10.2 tls * Must not sleep.
782 1.2.10.2 tls */
783 1.2.10.2 tls static void
784 1.2.10.2 tls nfsrc_freecache(struct nfsrvcache *rp)
785 1.2.10.2 tls {
786 1.2.10.3 jdolecek struct nfsrchash_bucket *hbp;
787 1.2.10.2 tls
788 1.2.10.2 tls LIST_REMOVE(rp, rc_hash);
789 1.2.10.2 tls if (rp->rc_flag & RC_UDP) {
790 1.2.10.2 tls TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
791 1.2.10.2 tls nfsrc_udpcachesize--;
792 1.2.10.3 jdolecek } else if (rp->rc_acked != RC_NO_SEQ) {
793 1.2.10.3 jdolecek hbp = NFSRCAHASH(rp->rc_sockref);
794 1.2.10.3 jdolecek mtx_lock(&hbp->mtx);
795 1.2.10.3 jdolecek if (rp->rc_acked == RC_NO_ACK)
796 1.2.10.3 jdolecek LIST_REMOVE(rp, rc_ahash);
797 1.2.10.3 jdolecek mtx_unlock(&hbp->mtx);
798 1.2.10.2 tls }
799 1.2.10.2 tls nfsrc_wanted(rp);
800 1.2.10.2 tls if (rp->rc_flag & RC_REPMBUF) {
801 1.2.10.2 tls mbuf_freem(rp->rc_reply);
802 1.2.10.2 tls if (!(rp->rc_flag & RC_UDP))
803 1.2.10.2 tls atomic_add_int(&nfsrc_tcpsavedreplies, -1);
804 1.2.10.2 tls }
805 1.2.10.2 tls FREE((caddr_t)rp, M_NFSRVCACHE);
806 1.2.10.3 jdolecek atomic_add_int(&nfsstatsv1.srvcache_size, -1);
807 1.2.10.2 tls }
808 1.2.10.2 tls
809 1.2.10.2 tls /*
810 1.2.10.2 tls * Clean out the cache. Called when nfsserver module is unloaded.
811 1.2.10.2 tls */
812 1.2.10.2 tls APPLESTATIC void
813 1.2.10.2 tls nfsrvd_cleancache(void)
814 1.2.10.2 tls {
815 1.2.10.2 tls struct nfsrvcache *rp, *nextrp;
816 1.2.10.2 tls int i;
817 1.2.10.2 tls
818 1.2.10.2 tls for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
819 1.2.10.2 tls mtx_lock(&nfsrchash_table[i].mtx);
820 1.2.10.2 tls LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
821 1.2.10.2 tls nfsrc_freecache(rp);
822 1.2.10.2 tls mtx_unlock(&nfsrchash_table[i].mtx);
823 1.2.10.2 tls }
824 1.2.10.2 tls mtx_lock(&nfsrc_udpmtx);
825 1.2.10.2 tls for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
826 1.2.10.2 tls LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
827 1.2.10.2 tls nfsrc_freecache(rp);
828 1.2.10.2 tls }
829 1.2.10.2 tls }
830 1.2.10.3 jdolecek nfsstatsv1.srvcache_size = 0;
831 1.2.10.2 tls mtx_unlock(&nfsrc_udpmtx);
832 1.2.10.2 tls nfsrc_tcpsavedreplies = 0;
833 1.2.10.2 tls }
834 1.2.10.2 tls
835 1.2.10.3 jdolecek #define HISTSIZE 16
836 1.2.10.2 tls /*
837 1.2.10.2 tls * The basic rule is to get rid of entries that are expired.
838 1.2.10.2 tls */
839 1.2.10.3 jdolecek void
840 1.2.10.3 jdolecek nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final)
841 1.2.10.2 tls {
842 1.2.10.3 jdolecek struct nfsrchash_bucket *hbp;
843 1.2.10.2 tls struct nfsrvcache *rp, *nextrp;
844 1.2.10.3 jdolecek int force, lastslot, i, j, k, tto, time_histo[HISTSIZE];
845 1.2.10.2 tls time_t thisstamp;
846 1.2.10.2 tls static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
847 1.2.10.3 jdolecek static int onethread = 0, oneslot = 0;
848 1.2.10.3 jdolecek
849 1.2.10.3 jdolecek if (sockref != 0) {
850 1.2.10.3 jdolecek hbp = NFSRCAHASH(sockref);
851 1.2.10.3 jdolecek mtx_lock(&hbp->mtx);
852 1.2.10.3 jdolecek LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) {
853 1.2.10.3 jdolecek if (sockref == rp->rc_sockref) {
854 1.2.10.3 jdolecek if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) {
855 1.2.10.3 jdolecek rp->rc_acked = RC_ACK;
856 1.2.10.3 jdolecek LIST_REMOVE(rp, rc_ahash);
857 1.2.10.3 jdolecek } else if (final) {
858 1.2.10.3 jdolecek rp->rc_acked = RC_NACK;
859 1.2.10.3 jdolecek LIST_REMOVE(rp, rc_ahash);
860 1.2.10.3 jdolecek }
861 1.2.10.3 jdolecek }
862 1.2.10.3 jdolecek }
863 1.2.10.3 jdolecek mtx_unlock(&hbp->mtx);
864 1.2.10.3 jdolecek }
865 1.2.10.2 tls
866 1.2.10.2 tls if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
867 1.2.10.2 tls return;
868 1.2.10.2 tls if (NFSD_MONOSEC != udp_lasttrim ||
869 1.2.10.2 tls nfsrc_udpcachesize >= (nfsrc_udphighwater +
870 1.2.10.2 tls nfsrc_udphighwater / 2)) {
871 1.2.10.2 tls mtx_lock(&nfsrc_udpmtx);
872 1.2.10.2 tls udp_lasttrim = NFSD_MONOSEC;
873 1.2.10.2 tls TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
874 1.2.10.2 tls if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
875 1.2.10.2 tls && rp->rc_refcnt == 0
876 1.2.10.2 tls && ((rp->rc_flag & RC_REFCNT) ||
877 1.2.10.2 tls udp_lasttrim > rp->rc_timestamp ||
878 1.2.10.2 tls nfsrc_udpcachesize > nfsrc_udphighwater))
879 1.2.10.2 tls nfsrc_freecache(rp);
880 1.2.10.2 tls }
881 1.2.10.2 tls mtx_unlock(&nfsrc_udpmtx);
882 1.2.10.2 tls }
883 1.2.10.2 tls if (NFSD_MONOSEC != tcp_lasttrim ||
884 1.2.10.2 tls nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
885 1.2.10.3 jdolecek force = nfsrc_tcphighwater / 4;
886 1.2.10.3 jdolecek if (force > 0 &&
887 1.2.10.3 jdolecek nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) {
888 1.2.10.3 jdolecek for (i = 0; i < HISTSIZE; i++)
889 1.2.10.3 jdolecek time_histo[i] = 0;
890 1.2.10.3 jdolecek i = 0;
891 1.2.10.3 jdolecek lastslot = NFSRVCACHE_HASHSIZE - 1;
892 1.2.10.3 jdolecek } else {
893 1.2.10.3 jdolecek force = 0;
894 1.2.10.3 jdolecek if (NFSD_MONOSEC != tcp_lasttrim) {
895 1.2.10.3 jdolecek i = 0;
896 1.2.10.3 jdolecek lastslot = NFSRVCACHE_HASHSIZE - 1;
897 1.2.10.3 jdolecek } else {
898 1.2.10.3 jdolecek lastslot = i = oneslot;
899 1.2.10.3 jdolecek if (++oneslot >= NFSRVCACHE_HASHSIZE)
900 1.2.10.3 jdolecek oneslot = 0;
901 1.2.10.3 jdolecek }
902 1.2.10.3 jdolecek }
903 1.2.10.3 jdolecek tto = nfsrc_tcptimeout;
904 1.2.10.3 jdolecek tcp_lasttrim = NFSD_MONOSEC;
905 1.2.10.3 jdolecek for (; i <= lastslot; i++) {
906 1.2.10.2 tls mtx_lock(&nfsrchash_table[i].mtx);
907 1.2.10.2 tls LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
908 1.2.10.2 tls nextrp) {
909 1.2.10.2 tls if (!(rp->rc_flag &
910 1.2.10.2 tls (RC_INPROG|RC_LOCKED|RC_WANTED))
911 1.2.10.2 tls && rp->rc_refcnt == 0) {
912 1.2.10.3 jdolecek if ((rp->rc_flag & RC_REFCNT) ||
913 1.2.10.3 jdolecek tcp_lasttrim > rp->rc_timestamp ||
914 1.2.10.3 jdolecek rp->rc_acked == RC_ACK) {
915 1.2.10.3 jdolecek nfsrc_freecache(rp);
916 1.2.10.3 jdolecek continue;
917 1.2.10.3 jdolecek }
918 1.2.10.3 jdolecek
919 1.2.10.3 jdolecek if (force == 0)
920 1.2.10.3 jdolecek continue;
921 1.2.10.2 tls /*
922 1.2.10.2 tls * The timestamps range from roughly the
923 1.2.10.2 tls * present (tcp_lasttrim) to the present
924 1.2.10.2 tls * + nfsrc_tcptimeout. Generate a simple
925 1.2.10.2 tls * histogram of where the timeouts fall.
926 1.2.10.2 tls */
927 1.2.10.2 tls j = rp->rc_timestamp - tcp_lasttrim;
928 1.2.10.3 jdolecek if (j >= tto)
929 1.2.10.3 jdolecek j = HISTSIZE - 1;
930 1.2.10.3 jdolecek else if (j < 0)
931 1.2.10.2 tls j = 0;
932 1.2.10.3 jdolecek else
933 1.2.10.3 jdolecek j = j * HISTSIZE / tto;
934 1.2.10.2 tls time_histo[j]++;
935 1.2.10.2 tls }
936 1.2.10.2 tls }
937 1.2.10.2 tls mtx_unlock(&nfsrchash_table[i].mtx);
938 1.2.10.2 tls }
939 1.2.10.3 jdolecek if (force) {
940 1.2.10.2 tls /*
941 1.2.10.2 tls * Trim some more with a smaller timeout of as little
942 1.2.10.2 tls * as 20% of nfsrc_tcptimeout to try and get below
943 1.2.10.2 tls * 80% of the nfsrc_tcphighwater.
944 1.2.10.2 tls */
945 1.2.10.2 tls k = 0;
946 1.2.10.3 jdolecek for (i = 0; i < (HISTSIZE - 2); i++) {
947 1.2.10.2 tls k += time_histo[i];
948 1.2.10.3 jdolecek if (k > force)
949 1.2.10.2 tls break;
950 1.2.10.2 tls }
951 1.2.10.3 jdolecek k = tto * (i + 1) / HISTSIZE;
952 1.2.10.2 tls if (k < 1)
953 1.2.10.2 tls k = 1;
954 1.2.10.2 tls thisstamp = tcp_lasttrim + k;
955 1.2.10.2 tls for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
956 1.2.10.2 tls mtx_lock(&nfsrchash_table[i].mtx);
957 1.2.10.2 tls LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
958 1.2.10.2 tls rc_hash, nextrp) {
959 1.2.10.2 tls if (!(rp->rc_flag &
960 1.2.10.2 tls (RC_INPROG|RC_LOCKED|RC_WANTED))
961 1.2.10.2 tls && rp->rc_refcnt == 0
962 1.2.10.2 tls && ((rp->rc_flag & RC_REFCNT) ||
963 1.2.10.2 tls thisstamp > rp->rc_timestamp ||
964 1.2.10.3 jdolecek rp->rc_acked == RC_ACK))
965 1.2.10.2 tls nfsrc_freecache(rp);
966 1.2.10.2 tls }
967 1.2.10.2 tls mtx_unlock(&nfsrchash_table[i].mtx);
968 1.2.10.2 tls }
969 1.2.10.2 tls }
970 1.2.10.2 tls }
971 1.2.10.2 tls atomic_store_rel_int(&onethread, 0);
972 1.2.10.2 tls }
973 1.2.10.2 tls
974 1.2.10.2 tls /*
975 1.2.10.2 tls * Add a seqid# reference to the cache entry.
976 1.2.10.2 tls */
977 1.2.10.2 tls APPLESTATIC void
978 1.2.10.2 tls nfsrvd_refcache(struct nfsrvcache *rp)
979 1.2.10.2 tls {
980 1.2.10.2 tls struct mtx *mutex;
981 1.2.10.2 tls
982 1.2.10.3 jdolecek if (rp == NULL)
983 1.2.10.3 jdolecek /* For NFSv4.1, there is no cache entry. */
984 1.2.10.3 jdolecek return;
985 1.2.10.2 tls mutex = nfsrc_cachemutex(rp);
986 1.2.10.2 tls mtx_lock(mutex);
987 1.2.10.2 tls if (rp->rc_refcnt < 0)
988 1.2.10.2 tls panic("nfs cache refcnt");
989 1.2.10.2 tls rp->rc_refcnt++;
990 1.2.10.2 tls mtx_unlock(mutex);
991 1.2.10.2 tls }
992 1.2.10.2 tls
993 1.2.10.2 tls /*
994 1.2.10.2 tls * Dereference a seqid# cache entry.
995 1.2.10.2 tls */
996 1.2.10.2 tls APPLESTATIC void
997 1.2.10.2 tls nfsrvd_derefcache(struct nfsrvcache *rp)
998 1.2.10.2 tls {
999 1.2.10.2 tls struct mtx *mutex;
1000 1.2.10.2 tls
1001 1.2.10.2 tls mutex = nfsrc_cachemutex(rp);
1002 1.2.10.2 tls mtx_lock(mutex);
1003 1.2.10.2 tls if (rp->rc_refcnt <= 0)
1004 1.2.10.2 tls panic("nfs cache derefcnt");
1005 1.2.10.2 tls rp->rc_refcnt--;
1006 1.2.10.2 tls if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
1007 1.2.10.2 tls nfsrc_freecache(rp);
1008 1.2.10.2 tls mtx_unlock(mutex);
1009 1.2.10.2 tls }
1010 1.2.10.2 tls
1011 1.2.10.2 tls /*
1012 1.2.10.2 tls * Calculate the length of the mbuf list and a checksum on the first up to
1013 1.2.10.2 tls * NFSRVCACHE_CHECKLEN bytes.
1014 1.2.10.2 tls */
1015 1.2.10.2 tls static int
1016 1.2.10.2 tls nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
1017 1.2.10.2 tls {
1018 1.2.10.2 tls int len = 0, cklen;
1019 1.2.10.2 tls mbuf_t m;
1020 1.2.10.2 tls
1021 1.2.10.2 tls m = m1;
1022 1.2.10.2 tls while (m) {
1023 1.2.10.2 tls len += mbuf_len(m);
1024 1.2.10.2 tls m = mbuf_next(m);
1025 1.2.10.2 tls }
1026 1.2.10.2 tls cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
1027 1.2.10.2 tls *cksum = in_cksum(m1, cklen);
1028 1.2.10.2 tls return (len);
1029 1.2.10.2 tls }
1030 1.2.10.2 tls
1031 1.2.10.2 tls /*
1032 1.2.10.2 tls * Mark a TCP connection that is seeing retries. Should never happen for
1033 1.2.10.2 tls * NFSv4.
1034 1.2.10.2 tls */
1035 1.2.10.2 tls static void
1036 1.2.10.2 tls nfsrc_marksametcpconn(u_int64_t sockref)
1037 1.2.10.2 tls {
1038 1.2.10.2 tls }
1039 1.2.10.2 tls
1040