nfs_nfsdcache.c revision 1.2.12.1 1 1.2.12.1 skrll /* $NetBSD: nfs_nfsdcache.c,v 1.2.12.1 2016/12/05 10:55:26 skrll Exp $ */
2 1.1 dholland /*-
3 1.1 dholland * Copyright (c) 1989, 1993
4 1.1 dholland * The Regents of the University of California. All rights reserved.
5 1.1 dholland *
6 1.1 dholland * This code is derived from software contributed to Berkeley by
7 1.1 dholland * Rick Macklem at The University of Guelph.
8 1.1 dholland *
9 1.1 dholland * Redistribution and use in source and binary forms, with or without
10 1.1 dholland * modification, are permitted provided that the following conditions
11 1.1 dholland * are met:
12 1.1 dholland * 1. Redistributions of source code must retain the above copyright
13 1.1 dholland * notice, this list of conditions and the following disclaimer.
14 1.1 dholland * 2. Redistributions in binary form must reproduce the above copyright
15 1.1 dholland * notice, this list of conditions and the following disclaimer in the
16 1.1 dholland * documentation and/or other materials provided with the distribution.
17 1.1 dholland * 4. Neither the name of the University nor the names of its contributors
18 1.1 dholland * may be used to endorse or promote products derived from this software
19 1.1 dholland * without specific prior written permission.
20 1.1 dholland *
21 1.1 dholland * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 1.1 dholland * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 1.1 dholland * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 1.1 dholland * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 1.1 dholland * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 1.1 dholland * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 1.1 dholland * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 1.1 dholland * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 1.1 dholland * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 1.1 dholland * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 1.1 dholland * SUCH DAMAGE.
32 1.1 dholland *
33 1.1 dholland */
34 1.1 dholland
35 1.1 dholland #include <sys/cdefs.h>
36 1.2.12.1 skrll /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 304026 2016-08-12 22:44:59Z rmacklem "); */
37 1.2.12.1 skrll __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.2.12.1 2016/12/05 10:55:26 skrll Exp $");
38 1.1 dholland
39 1.1 dholland /*
40 1.1 dholland * Here is the basic algorithm:
41 1.1 dholland * First, some design criteria I used:
42 1.1 dholland * - I think a false hit is more serious than a false miss
43 1.1 dholland * - A false hit for an RPC that has Op(s) that order via seqid# must be
44 1.1 dholland * avoided at all cost
45 1.1 dholland * - A valid hit will probably happen a long time after the original reply
46 1.1 dholland * and the TCP socket that the original request was received on will no
47 1.1 dholland * longer be active
48 1.1 dholland * (The long time delay implies to me that LRU is not appropriate.)
49 1.1 dholland * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
50 1.1 dholland * in them as well as minimizing the risk of redoing retried non-idempotent
51 1.1 dholland * Ops.
52 1.1 dholland * Because it is biased towards avoiding false hits, multiple entries with
53 1.1 dholland * the same xid are to be expected, especially for the case of the entry
54 1.1 dholland * in the cache being related to a seqid# sequenced Op.
55 1.1 dholland *
56 1.1 dholland * The basic algorithm I'm about to code up:
57 1.1 dholland * - Null RPCs bypass the cache and are just done
58 1.1 dholland * For TCP
59 1.1 dholland * - key on <xid, NFS version> (as noted above, there can be several
60 1.1 dholland * entries with the same key)
61 1.1 dholland * When a request arrives:
62 1.1 dholland * For all that match key
63 1.1 dholland * - if RPC# != OR request_size !=
64 1.1 dholland * - not a match with this one
65 1.1 dholland * - if NFSv4 and received on same TCP socket OR
66 1.1 dholland * received on a TCP connection created before the
67 1.1 dholland * entry was cached
68 1.1 dholland * - not a match with this one
69 1.1 dholland * (V2,3 clients might retry on same TCP socket)
70 1.1 dholland * - calculate checksum on first N bytes of NFS XDR
71 1.1 dholland * - if checksum !=
72 1.1 dholland * - not a match for this one
73 1.1 dholland * If any of the remaining ones that match has a
74 1.1 dholland * seqid_refcnt > 0
75 1.1 dholland * - not a match (go do RPC, using new cache entry)
76 1.1 dholland * If one match left
77 1.1 dholland * - a hit (reply from cache)
78 1.1 dholland * else
79 1.1 dholland * - miss (go do RPC, using new cache entry)
80 1.1 dholland *
81 1.1 dholland * During processing of NFSv4 request:
82 1.1 dholland * - set a flag when a non-idempotent Op is processed
83 1.1 dholland * - when an Op that uses a seqid# (Open,...) is processed
84 1.1 dholland * - if same seqid# as referenced entry in cache
85 1.1 dholland * - free new cache entry
86 1.1 dholland * - reply from referenced cache entry
87 1.1 dholland * else if next seqid# in order
88 1.1 dholland * - free referenced cache entry
89 1.1 dholland * - increment seqid_refcnt on new cache entry
90 1.1 dholland * - set pointer from Openowner/Lockowner to
91 1.1 dholland * new cache entry (aka reference it)
92 1.1 dholland * else if first seqid# in sequence
93 1.1 dholland * - increment seqid_refcnt on new cache entry
94 1.1 dholland * - set pointer from Openowner/Lockowner to
95 1.1 dholland * new cache entry (aka reference it)
96 1.1 dholland *
97 1.1 dholland * At end of RPC processing:
98 1.1 dholland * - if seqid_refcnt > 0 OR flagged non-idempotent on new
99 1.1 dholland * cache entry
100 1.1 dholland * - save reply in cache entry
101 1.1 dholland * - calculate checksum on first N bytes of NFS XDR
102 1.1 dholland * request
103 1.1 dholland * - note op and length of XDR request (in bytes)
104 1.1 dholland * - timestamp it
105 1.1 dholland * else
106 1.1 dholland * - free new cache entry
107 1.1 dholland * - Send reply (noting info for socket activity check, below)
108 1.1 dholland *
109 1.1 dholland * For cache entries saved above:
110 1.1 dholland * - if saved since seqid_refcnt was > 0
111 1.1 dholland * - free when seqid_refcnt decrements to 0
112 1.1 dholland * (when next one in sequence is processed above, or
113 1.1 dholland * when Openowner/Lockowner is discarded)
114 1.1 dholland * else { non-idempotent Op(s) }
115 1.1 dholland * - free when
116 1.1 dholland * - some further activity observed on same
117 1.1 dholland * socket
118 1.1 dholland * (I'm not yet sure how I'm going to do
119 1.1 dholland * this. Maybe look at the TCP connection
120 1.1 dholland * to see if the send_tcp_sequence# is well
121 1.1 dholland * past sent reply OR K additional RPCs
122 1.1 dholland * replied on same socket OR?)
123 1.1 dholland * OR
124 1.1 dholland * - when very old (hours, days, weeks?)
125 1.1 dholland *
126 1.1 dholland * For UDP (v2, 3 only), pretty much the old way:
127 1.1 dholland * - key on <xid, NFS version, RPC#, Client host ip#>
128 1.1 dholland * (at most one entry for each key)
129 1.1 dholland *
130 1.1 dholland * When a Request arrives:
131 1.1 dholland * - if a match with entry via key
132 1.1 dholland * - if RPC marked In_progress
133 1.1 dholland * - discard request (don't send reply)
134 1.1 dholland * else
135 1.1 dholland * - reply from cache
136 1.1 dholland * - timestamp cache entry
137 1.1 dholland * else
138 1.1 dholland * - add entry to cache, marked In_progress
139 1.1 dholland * - do RPC
140 1.1 dholland * - when RPC done
141 1.1 dholland * - if RPC# non-idempotent
142 1.1 dholland * - mark entry Done (not In_progress)
143 1.1 dholland * - save reply
144 1.1 dholland * - timestamp cache entry
145 1.1 dholland * else
146 1.1 dholland * - free cache entry
147 1.1 dholland * - send reply
148 1.1 dholland *
149 1.1 dholland * Later, entries with saved replies are free'd a short time (few minutes)
150 1.1 dholland * after reply sent (timestamp).
151 1.1 dholland * Reference: Chet Juszczak, "Improving the Performance and Correctness
152 1.1 dholland * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
153 1.1 dholland * pages 53-63. San Diego, February 1989.
154 1.1 dholland * for the UDP case.
155 1.1 dholland * nfsrc_floodlevel is set to the allowable upper limit for saved replies
156 1.1 dholland * for TCP. For V3, a reply won't be saved when the flood level is
157 1.1 dholland * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
158 1.1 dholland * that case. This level should be set high enough that this almost
159 1.1 dholland * never happens.
160 1.1 dholland */
161 1.1 dholland #ifndef APPLEKEXT
162 1.1 dholland #include <fs/nfs/nfsport.h>
163 1.1 dholland
164 1.2.12.1 skrll extern struct nfsstatsv1 nfsstatsv1;
165 1.1 dholland extern struct mtx nfsrc_udpmtx;
166 1.1 dholland extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
167 1.2.12.1 skrll extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
168 1.1 dholland int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
169 1.1 dholland #endif /* !APPLEKEXT */
170 1.1 dholland
171 1.1 dholland SYSCTL_DECL(_vfs_nfsd);
172 1.1 dholland
173 1.1 dholland static u_int nfsrc_tcphighwater = 0;
174 1.1 dholland static int
175 1.1 dholland sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
176 1.1 dholland {
177 1.1 dholland int error, newhighwater;
178 1.1 dholland
179 1.1 dholland newhighwater = nfsrc_tcphighwater;
180 1.1 dholland error = sysctl_handle_int(oidp, &newhighwater, 0, req);
181 1.1 dholland if (error != 0 || req->newptr == NULL)
182 1.1 dholland return (error);
183 1.1 dholland if (newhighwater < 0)
184 1.1 dholland return (EINVAL);
185 1.1 dholland if (newhighwater >= nfsrc_floodlevel)
186 1.1 dholland nfsrc_floodlevel = newhighwater + newhighwater / 5;
187 1.1 dholland nfsrc_tcphighwater = newhighwater;
188 1.1 dholland return (0);
189 1.1 dholland }
190 1.1 dholland SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
191 1.1 dholland sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
192 1.1 dholland "High water mark for TCP cache entries");
193 1.1 dholland
194 1.1 dholland static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
195 1.1 dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
196 1.1 dholland &nfsrc_udphighwater, 0,
197 1.1 dholland "High water mark for UDP cache entries");
198 1.1 dholland static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
199 1.1 dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
200 1.1 dholland &nfsrc_tcptimeout, 0,
201 1.1 dholland "Timeout for TCP entries in the DRC");
202 1.1 dholland static u_int nfsrc_tcpnonidempotent = 1;
203 1.1 dholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
204 1.1 dholland &nfsrc_tcpnonidempotent, 0,
205 1.1 dholland "Enable the DRC for NFS over TCP");
206 1.1 dholland
207 1.1 dholland static int nfsrc_udpcachesize = 0;
208 1.1 dholland static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
209 1.1 dholland static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
210 1.1 dholland
211 1.1 dholland /*
212 1.1 dholland * and the reverse mapping from generic to Version 2 procedure numbers
213 1.1 dholland */
214 1.1 dholland static int newnfsv2_procid[NFS_V3NPROCS] = {
215 1.1 dholland NFSV2PROC_NULL,
216 1.1 dholland NFSV2PROC_GETATTR,
217 1.1 dholland NFSV2PROC_SETATTR,
218 1.1 dholland NFSV2PROC_LOOKUP,
219 1.1 dholland NFSV2PROC_NOOP,
220 1.1 dholland NFSV2PROC_READLINK,
221 1.1 dholland NFSV2PROC_READ,
222 1.1 dholland NFSV2PROC_WRITE,
223 1.1 dholland NFSV2PROC_CREATE,
224 1.1 dholland NFSV2PROC_MKDIR,
225 1.1 dholland NFSV2PROC_SYMLINK,
226 1.1 dholland NFSV2PROC_CREATE,
227 1.1 dholland NFSV2PROC_REMOVE,
228 1.1 dholland NFSV2PROC_RMDIR,
229 1.1 dholland NFSV2PROC_RENAME,
230 1.1 dholland NFSV2PROC_LINK,
231 1.1 dholland NFSV2PROC_READDIR,
232 1.1 dholland NFSV2PROC_NOOP,
233 1.1 dholland NFSV2PROC_STATFS,
234 1.1 dholland NFSV2PROC_NOOP,
235 1.1 dholland NFSV2PROC_NOOP,
236 1.1 dholland NFSV2PROC_NOOP,
237 1.1 dholland };
238 1.1 dholland
239 1.1 dholland #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
240 1.1 dholland #define NFSRCUDPHASH(xid) \
241 1.1 dholland (&nfsrvudphashtbl[nfsrc_hash(xid)])
242 1.1 dholland #define NFSRCHASH(xid) \
243 1.1 dholland (&nfsrchash_table[nfsrc_hash(xid)].tbl)
244 1.2.12.1 skrll #define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)])
245 1.1 dholland #define TRUE 1
246 1.1 dholland #define FALSE 0
247 1.1 dholland #define NFSRVCACHE_CHECKLEN 100
248 1.1 dholland
249 1.1 dholland /* True iff the rpc reply is an nfs status ONLY! */
250 1.1 dholland static int nfsv2_repstat[NFS_V3NPROCS] = {
251 1.1 dholland FALSE,
252 1.1 dholland FALSE,
253 1.1 dholland FALSE,
254 1.1 dholland FALSE,
255 1.1 dholland FALSE,
256 1.1 dholland FALSE,
257 1.1 dholland FALSE,
258 1.1 dholland FALSE,
259 1.1 dholland FALSE,
260 1.1 dholland FALSE,
261 1.1 dholland TRUE,
262 1.1 dholland TRUE,
263 1.1 dholland TRUE,
264 1.1 dholland TRUE,
265 1.1 dholland FALSE,
266 1.1 dholland TRUE,
267 1.1 dholland FALSE,
268 1.1 dholland FALSE,
269 1.1 dholland FALSE,
270 1.1 dholland FALSE,
271 1.1 dholland FALSE,
272 1.1 dholland FALSE,
273 1.1 dholland };
274 1.1 dholland
275 1.1 dholland /*
276 1.1 dholland * Will NFS want to work over IPv6 someday?
277 1.1 dholland */
278 1.1 dholland #define NETFAMILY(rp) \
279 1.1 dholland (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
280 1.1 dholland
281 1.1 dholland /* local functions */
282 1.1 dholland static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
283 1.1 dholland static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
284 1.1 dholland static void nfsrc_lock(struct nfsrvcache *rp);
285 1.1 dholland static void nfsrc_unlock(struct nfsrvcache *rp);
286 1.1 dholland static void nfsrc_wanted(struct nfsrvcache *rp);
287 1.1 dholland static void nfsrc_freecache(struct nfsrvcache *rp);
288 1.1 dholland static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
289 1.1 dholland static void nfsrc_marksametcpconn(u_int64_t);
290 1.1 dholland
291 1.1 dholland /*
292 1.1 dholland * Return the correct mutex for this cache entry.
293 1.1 dholland */
294 1.1 dholland static __inline struct mtx *
295 1.1 dholland nfsrc_cachemutex(struct nfsrvcache *rp)
296 1.1 dholland {
297 1.1 dholland
298 1.1 dholland if ((rp->rc_flag & RC_UDP) != 0)
299 1.1 dholland return (&nfsrc_udpmtx);
300 1.1 dholland return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
301 1.1 dholland }
302 1.1 dholland
303 1.1 dholland /*
304 1.1 dholland * Initialize the server request cache list
305 1.1 dholland */
306 1.1 dholland APPLESTATIC void
307 1.1 dholland nfsrvd_initcache(void)
308 1.1 dholland {
309 1.1 dholland int i;
310 1.1 dholland static int inited = 0;
311 1.1 dholland
312 1.1 dholland if (inited)
313 1.1 dholland return;
314 1.1 dholland inited = 1;
315 1.1 dholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
316 1.1 dholland LIST_INIT(&nfsrvudphashtbl[i]);
317 1.1 dholland LIST_INIT(&nfsrchash_table[i].tbl);
318 1.2.12.1 skrll LIST_INIT(&nfsrcahash_table[i].tbl);
319 1.1 dholland }
320 1.1 dholland TAILQ_INIT(&nfsrvudplru);
321 1.1 dholland nfsrc_tcpsavedreplies = 0;
322 1.1 dholland nfsrc_udpcachesize = 0;
323 1.2.12.1 skrll nfsstatsv1.srvcache_tcppeak = 0;
324 1.2.12.1 skrll nfsstatsv1.srvcache_size = 0;
325 1.1 dholland }
326 1.1 dholland
327 1.1 dholland /*
328 1.1 dholland * Get a cache entry for this request. Basically just malloc a new one
329 1.1 dholland * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
330 1.1 dholland */
331 1.1 dholland APPLESTATIC int
332 1.2.12.1 skrll nfsrvd_getcache(struct nfsrv_descript *nd)
333 1.1 dholland {
334 1.1 dholland struct nfsrvcache *newrp;
335 1.1 dholland int ret;
336 1.1 dholland
337 1.1 dholland if (nd->nd_procnum == NFSPROC_NULL)
338 1.1 dholland panic("nfsd cache null");
339 1.1 dholland MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
340 1.1 dholland M_NFSRVCACHE, M_WAITOK);
341 1.1 dholland NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
342 1.1 dholland if (nd->nd_flag & ND_NFSV4)
343 1.1 dholland newrp->rc_flag = RC_NFSV4;
344 1.1 dholland else if (nd->nd_flag & ND_NFSV3)
345 1.1 dholland newrp->rc_flag = RC_NFSV3;
346 1.1 dholland else
347 1.1 dholland newrp->rc_flag = RC_NFSV2;
348 1.1 dholland newrp->rc_xid = nd->nd_retxid;
349 1.1 dholland newrp->rc_proc = nd->nd_procnum;
350 1.1 dholland newrp->rc_sockref = nd->nd_sockref;
351 1.1 dholland newrp->rc_cachetime = nd->nd_tcpconntime;
352 1.1 dholland if (nd->nd_flag & ND_SAMETCPCONN)
353 1.1 dholland newrp->rc_flag |= RC_SAMETCPCONN;
354 1.1 dholland if (nd->nd_nam2 != NULL) {
355 1.1 dholland newrp->rc_flag |= RC_UDP;
356 1.1 dholland ret = nfsrc_getudp(nd, newrp);
357 1.1 dholland } else {
358 1.1 dholland ret = nfsrc_gettcp(nd, newrp);
359 1.1 dholland }
360 1.1 dholland NFSEXITCODE2(0, nd);
361 1.1 dholland return (ret);
362 1.1 dholland }
363 1.1 dholland
364 1.1 dholland /*
365 1.1 dholland * For UDP (v2, v3):
366 1.1 dholland * - key on <xid, NFS version, RPC#, Client host ip#>
367 1.1 dholland * (at most one entry for each key)
368 1.1 dholland */
369 1.1 dholland static int
370 1.1 dholland nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
371 1.1 dholland {
372 1.1 dholland struct nfsrvcache *rp;
373 1.1 dholland struct sockaddr_in *saddr;
374 1.1 dholland struct sockaddr_in6 *saddr6;
375 1.1 dholland struct nfsrvhashhead *hp;
376 1.1 dholland int ret = 0;
377 1.1 dholland struct mtx *mutex;
378 1.1 dholland
379 1.1 dholland mutex = nfsrc_cachemutex(newrp);
380 1.1 dholland hp = NFSRCUDPHASH(newrp->rc_xid);
381 1.1 dholland loop:
382 1.1 dholland mtx_lock(mutex);
383 1.1 dholland LIST_FOREACH(rp, hp, rc_hash) {
384 1.1 dholland if (newrp->rc_xid == rp->rc_xid &&
385 1.1 dholland newrp->rc_proc == rp->rc_proc &&
386 1.1 dholland (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
387 1.1 dholland nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
388 1.1 dholland if ((rp->rc_flag & RC_LOCKED) != 0) {
389 1.1 dholland rp->rc_flag |= RC_WANTED;
390 1.1 dholland (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
391 1.1 dholland "nfsrc", 10 * hz);
392 1.1 dholland goto loop;
393 1.1 dholland }
394 1.1 dholland if (rp->rc_flag == 0)
395 1.1 dholland panic("nfs udp cache0");
396 1.1 dholland rp->rc_flag |= RC_LOCKED;
397 1.1 dholland TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
398 1.1 dholland TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
399 1.1 dholland if (rp->rc_flag & RC_INPROG) {
400 1.2.12.1 skrll nfsstatsv1.srvcache_inproghits++;
401 1.1 dholland mtx_unlock(mutex);
402 1.1 dholland ret = RC_DROPIT;
403 1.1 dholland } else if (rp->rc_flag & RC_REPSTATUS) {
404 1.1 dholland /*
405 1.1 dholland * V2 only.
406 1.1 dholland */
407 1.2.12.1 skrll nfsstatsv1.srvcache_nonidemdonehits++;
408 1.1 dholland mtx_unlock(mutex);
409 1.1 dholland nfsrvd_rephead(nd);
410 1.1 dholland *(nd->nd_errp) = rp->rc_status;
411 1.1 dholland ret = RC_REPLY;
412 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC +
413 1.1 dholland NFSRVCACHE_UDPTIMEOUT;
414 1.1 dholland } else if (rp->rc_flag & RC_REPMBUF) {
415 1.2.12.1 skrll nfsstatsv1.srvcache_nonidemdonehits++;
416 1.1 dholland mtx_unlock(mutex);
417 1.1 dholland nd->nd_mreq = m_copym(rp->rc_reply, 0,
418 1.1 dholland M_COPYALL, M_WAITOK);
419 1.1 dholland ret = RC_REPLY;
420 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC +
421 1.1 dholland NFSRVCACHE_UDPTIMEOUT;
422 1.1 dholland } else {
423 1.1 dholland panic("nfs udp cache1");
424 1.1 dholland }
425 1.1 dholland nfsrc_unlock(rp);
426 1.1 dholland free((caddr_t)newrp, M_NFSRVCACHE);
427 1.1 dholland goto out;
428 1.1 dholland }
429 1.1 dholland }
430 1.2.12.1 skrll nfsstatsv1.srvcache_misses++;
431 1.2.12.1 skrll atomic_add_int(&nfsstatsv1.srvcache_size, 1);
432 1.1 dholland nfsrc_udpcachesize++;
433 1.1 dholland
434 1.1 dholland newrp->rc_flag |= RC_INPROG;
435 1.1 dholland saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
436 1.1 dholland if (saddr->sin_family == AF_INET)
437 1.1 dholland newrp->rc_inet = saddr->sin_addr.s_addr;
438 1.1 dholland else if (saddr->sin_family == AF_INET6) {
439 1.1 dholland saddr6 = (struct sockaddr_in6 *)saddr;
440 1.1 dholland NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
441 1.1 dholland sizeof (struct in6_addr));
442 1.1 dholland newrp->rc_flag |= RC_INETIPV6;
443 1.1 dholland }
444 1.1 dholland LIST_INSERT_HEAD(hp, newrp, rc_hash);
445 1.1 dholland TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
446 1.1 dholland mtx_unlock(mutex);
447 1.1 dholland nd->nd_rp = newrp;
448 1.1 dholland ret = RC_DOIT;
449 1.1 dholland
450 1.1 dholland out:
451 1.1 dholland NFSEXITCODE2(0, nd);
452 1.1 dholland return (ret);
453 1.1 dholland }
454 1.1 dholland
455 1.1 dholland /*
456 1.1 dholland * Update a request cache entry after the rpc has been done
457 1.1 dholland */
458 1.1 dholland APPLESTATIC struct nfsrvcache *
459 1.2.12.1 skrll nfsrvd_updatecache(struct nfsrv_descript *nd)
460 1.1 dholland {
461 1.1 dholland struct nfsrvcache *rp;
462 1.1 dholland struct nfsrvcache *retrp = NULL;
463 1.1 dholland mbuf_t m;
464 1.1 dholland struct mtx *mutex;
465 1.1 dholland
466 1.1 dholland rp = nd->nd_rp;
467 1.1 dholland if (!rp)
468 1.1 dholland panic("nfsrvd_updatecache null rp");
469 1.1 dholland nd->nd_rp = NULL;
470 1.1 dholland mutex = nfsrc_cachemutex(rp);
471 1.1 dholland mtx_lock(mutex);
472 1.1 dholland nfsrc_lock(rp);
473 1.1 dholland if (!(rp->rc_flag & RC_INPROG))
474 1.1 dholland panic("nfsrvd_updatecache not inprog");
475 1.1 dholland rp->rc_flag &= ~RC_INPROG;
476 1.1 dholland if (rp->rc_flag & RC_UDP) {
477 1.1 dholland TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
478 1.1 dholland TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
479 1.1 dholland }
480 1.1 dholland
481 1.1 dholland /*
482 1.1 dholland * Reply from cache is a special case returned by nfsrv_checkseqid().
483 1.1 dholland */
484 1.1 dholland if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
485 1.2.12.1 skrll nfsstatsv1.srvcache_nonidemdonehits++;
486 1.1 dholland mtx_unlock(mutex);
487 1.1 dholland nd->nd_repstat = 0;
488 1.1 dholland if (nd->nd_mreq)
489 1.1 dholland mbuf_freem(nd->nd_mreq);
490 1.1 dholland if (!(rp->rc_flag & RC_REPMBUF))
491 1.1 dholland panic("reply from cache");
492 1.1 dholland nd->nd_mreq = m_copym(rp->rc_reply, 0,
493 1.1 dholland M_COPYALL, M_WAITOK);
494 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
495 1.1 dholland nfsrc_unlock(rp);
496 1.1 dholland goto out;
497 1.1 dholland }
498 1.1 dholland
499 1.1 dholland /*
500 1.1 dholland * If rc_refcnt > 0, save it
501 1.1 dholland * For UDP, save it if ND_SAVEREPLY is set
502 1.1 dholland * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
503 1.1 dholland */
504 1.1 dholland if (nd->nd_repstat != NFSERR_DONTREPLY &&
505 1.1 dholland (rp->rc_refcnt > 0 ||
506 1.1 dholland ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
507 1.1 dholland ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
508 1.1 dholland nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
509 1.1 dholland nfsrc_tcpnonidempotent))) {
510 1.1 dholland if (rp->rc_refcnt > 0) {
511 1.1 dholland if (!(rp->rc_flag & RC_NFSV4))
512 1.1 dholland panic("update_cache refcnt");
513 1.1 dholland rp->rc_flag |= RC_REFCNT;
514 1.1 dholland }
515 1.1 dholland if ((nd->nd_flag & ND_NFSV2) &&
516 1.1 dholland nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
517 1.1 dholland rp->rc_status = nd->nd_repstat;
518 1.1 dholland rp->rc_flag |= RC_REPSTATUS;
519 1.1 dholland mtx_unlock(mutex);
520 1.1 dholland } else {
521 1.1 dholland if (!(rp->rc_flag & RC_UDP)) {
522 1.1 dholland atomic_add_int(&nfsrc_tcpsavedreplies, 1);
523 1.1 dholland if (nfsrc_tcpsavedreplies >
524 1.2.12.1 skrll nfsstatsv1.srvcache_tcppeak)
525 1.2.12.1 skrll nfsstatsv1.srvcache_tcppeak =
526 1.1 dholland nfsrc_tcpsavedreplies;
527 1.1 dholland }
528 1.1 dholland mtx_unlock(mutex);
529 1.1 dholland m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
530 1.1 dholland mtx_lock(mutex);
531 1.1 dholland rp->rc_reply = m;
532 1.1 dholland rp->rc_flag |= RC_REPMBUF;
533 1.1 dholland mtx_unlock(mutex);
534 1.1 dholland }
535 1.1 dholland if (rp->rc_flag & RC_UDP) {
536 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC +
537 1.1 dholland NFSRVCACHE_UDPTIMEOUT;
538 1.1 dholland nfsrc_unlock(rp);
539 1.1 dholland } else {
540 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
541 1.1 dholland if (rp->rc_refcnt > 0)
542 1.1 dholland nfsrc_unlock(rp);
543 1.1 dholland else
544 1.1 dholland retrp = rp;
545 1.1 dholland }
546 1.1 dholland } else {
547 1.1 dholland nfsrc_freecache(rp);
548 1.1 dholland mtx_unlock(mutex);
549 1.1 dholland }
550 1.1 dholland
551 1.1 dholland out:
552 1.1 dholland NFSEXITCODE2(0, nd);
553 1.1 dholland return (retrp);
554 1.1 dholland }
555 1.1 dholland
556 1.1 dholland /*
557 1.1 dholland * Invalidate and, if possible, free an in prog cache entry.
558 1.1 dholland * Must not sleep.
559 1.1 dholland */
560 1.1 dholland APPLESTATIC void
561 1.1 dholland nfsrvd_delcache(struct nfsrvcache *rp)
562 1.1 dholland {
563 1.1 dholland struct mtx *mutex;
564 1.1 dholland
565 1.1 dholland mutex = nfsrc_cachemutex(rp);
566 1.1 dholland if (!(rp->rc_flag & RC_INPROG))
567 1.1 dholland panic("nfsrvd_delcache not in prog");
568 1.1 dholland mtx_lock(mutex);
569 1.1 dholland rp->rc_flag &= ~RC_INPROG;
570 1.1 dholland if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
571 1.1 dholland nfsrc_freecache(rp);
572 1.1 dholland mtx_unlock(mutex);
573 1.1 dholland }
574 1.1 dholland
575 1.1 dholland /*
576 1.1 dholland * Called after nfsrvd_updatecache() once the reply is sent, to update
577 1.2.12.1 skrll * the entry's sequence number and unlock it. The argument is
578 1.1 dholland * the pointer returned by nfsrvd_updatecache().
579 1.1 dholland */
580 1.1 dholland APPLESTATIC void
581 1.2.12.1 skrll nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq)
582 1.1 dholland {
583 1.2.12.1 skrll struct nfsrchash_bucket *hbp;
584 1.1 dholland
585 1.2.12.1 skrll KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked"));
586 1.2.12.1 skrll if (have_seq) {
587 1.2.12.1 skrll hbp = NFSRCAHASH(rp->rc_sockref);
588 1.2.12.1 skrll mtx_lock(&hbp->mtx);
589 1.2.12.1 skrll rp->rc_tcpseq = seq;
590 1.2.12.1 skrll if (rp->rc_acked != RC_NO_ACK)
591 1.2.12.1 skrll LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash);
592 1.2.12.1 skrll rp->rc_acked = RC_NO_ACK;
593 1.2.12.1 skrll mtx_unlock(&hbp->mtx);
594 1.1 dholland }
595 1.1 dholland nfsrc_unlock(rp);
596 1.1 dholland }
597 1.1 dholland
598 1.1 dholland /*
599 1.1 dholland * Get a cache entry for TCP
600 1.1 dholland * - key on <xid, nfs version>
601 1.1 dholland * (allow multiple entries for a given key)
602 1.1 dholland */
603 1.1 dholland static int
604 1.1 dholland nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
605 1.1 dholland {
606 1.1 dholland struct nfsrvcache *rp, *nextrp;
607 1.1 dholland int i;
608 1.1 dholland struct nfsrvcache *hitrp;
609 1.1 dholland struct nfsrvhashhead *hp, nfsrc_templist;
610 1.1 dholland int hit, ret = 0;
611 1.1 dholland struct mtx *mutex;
612 1.1 dholland
613 1.1 dholland mutex = nfsrc_cachemutex(newrp);
614 1.1 dholland hp = NFSRCHASH(newrp->rc_xid);
615 1.1 dholland newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
616 1.1 dholland tryagain:
617 1.1 dholland mtx_lock(mutex);
618 1.1 dholland hit = 1;
619 1.1 dholland LIST_INIT(&nfsrc_templist);
620 1.1 dholland /*
621 1.1 dholland * Get all the matches and put them on the temp list.
622 1.1 dholland */
623 1.1 dholland rp = LIST_FIRST(hp);
624 1.2 christos while (rp != NULL) {
625 1.1 dholland nextrp = LIST_NEXT(rp, rc_hash);
626 1.1 dholland if (newrp->rc_xid == rp->rc_xid &&
627 1.1 dholland (!(rp->rc_flag & RC_INPROG) ||
628 1.1 dholland ((newrp->rc_flag & RC_SAMETCPCONN) &&
629 1.1 dholland newrp->rc_sockref == rp->rc_sockref)) &&
630 1.1 dholland (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
631 1.1 dholland newrp->rc_proc == rp->rc_proc &&
632 1.1 dholland ((newrp->rc_flag & RC_NFSV4) &&
633 1.1 dholland newrp->rc_sockref != rp->rc_sockref &&
634 1.1 dholland newrp->rc_cachetime >= rp->rc_cachetime)
635 1.1 dholland && newrp->rc_reqlen == rp->rc_reqlen &&
636 1.1 dholland newrp->rc_cksum == rp->rc_cksum) {
637 1.1 dholland LIST_REMOVE(rp, rc_hash);
638 1.1 dholland LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
639 1.1 dholland }
640 1.1 dholland rp = nextrp;
641 1.1 dholland }
642 1.1 dholland
643 1.1 dholland /*
644 1.1 dholland * Now, use nfsrc_templist to decide if there is a match.
645 1.1 dholland */
646 1.1 dholland i = 0;
647 1.1 dholland LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
648 1.1 dholland i++;
649 1.1 dholland if (rp->rc_refcnt > 0) {
650 1.1 dholland hit = 0;
651 1.1 dholland break;
652 1.1 dholland }
653 1.1 dholland }
654 1.1 dholland /*
655 1.1 dholland * Can be a hit only if one entry left.
656 1.1 dholland * Note possible hit entry and put nfsrc_templist back on hash
657 1.1 dholland * list.
658 1.1 dholland */
659 1.1 dholland if (i != 1)
660 1.1 dholland hit = 0;
661 1.1 dholland hitrp = rp = LIST_FIRST(&nfsrc_templist);
662 1.2 christos while (rp != NULL) {
663 1.1 dholland nextrp = LIST_NEXT(rp, rc_hash);
664 1.1 dholland LIST_REMOVE(rp, rc_hash);
665 1.1 dholland LIST_INSERT_HEAD(hp, rp, rc_hash);
666 1.1 dholland rp = nextrp;
667 1.1 dholland }
668 1.2 christos if (LIST_FIRST(&nfsrc_templist) != NULL)
669 1.1 dholland panic("nfs gettcp cache templist");
670 1.1 dholland
671 1.1 dholland if (hit) {
672 1.1 dholland rp = hitrp;
673 1.1 dholland if ((rp->rc_flag & RC_LOCKED) != 0) {
674 1.1 dholland rp->rc_flag |= RC_WANTED;
675 1.1 dholland (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
676 1.1 dholland "nfsrc", 10 * hz);
677 1.1 dholland goto tryagain;
678 1.1 dholland }
679 1.1 dholland if (rp->rc_flag == 0)
680 1.1 dholland panic("nfs tcp cache0");
681 1.1 dholland rp->rc_flag |= RC_LOCKED;
682 1.1 dholland if (rp->rc_flag & RC_INPROG) {
683 1.2.12.1 skrll nfsstatsv1.srvcache_inproghits++;
684 1.1 dholland mtx_unlock(mutex);
685 1.1 dholland if (newrp->rc_sockref == rp->rc_sockref)
686 1.1 dholland nfsrc_marksametcpconn(rp->rc_sockref);
687 1.1 dholland ret = RC_DROPIT;
688 1.1 dholland } else if (rp->rc_flag & RC_REPSTATUS) {
689 1.1 dholland /*
690 1.1 dholland * V2 only.
691 1.1 dholland */
692 1.2.12.1 skrll nfsstatsv1.srvcache_nonidemdonehits++;
693 1.1 dholland mtx_unlock(mutex);
694 1.1 dholland if (newrp->rc_sockref == rp->rc_sockref)
695 1.1 dholland nfsrc_marksametcpconn(rp->rc_sockref);
696 1.1 dholland ret = RC_REPLY;
697 1.1 dholland nfsrvd_rephead(nd);
698 1.1 dholland *(nd->nd_errp) = rp->rc_status;
699 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
700 1.1 dholland } else if (rp->rc_flag & RC_REPMBUF) {
701 1.2.12.1 skrll nfsstatsv1.srvcache_nonidemdonehits++;
702 1.1 dholland mtx_unlock(mutex);
703 1.1 dholland if (newrp->rc_sockref == rp->rc_sockref)
704 1.1 dholland nfsrc_marksametcpconn(rp->rc_sockref);
705 1.1 dholland ret = RC_REPLY;
706 1.1 dholland nd->nd_mreq = m_copym(rp->rc_reply, 0,
707 1.1 dholland M_COPYALL, M_WAITOK);
708 1.1 dholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
709 1.1 dholland } else {
710 1.1 dholland panic("nfs tcp cache1");
711 1.1 dholland }
712 1.1 dholland nfsrc_unlock(rp);
713 1.1 dholland free((caddr_t)newrp, M_NFSRVCACHE);
714 1.1 dholland goto out;
715 1.1 dholland }
716 1.2.12.1 skrll nfsstatsv1.srvcache_misses++;
717 1.2.12.1 skrll atomic_add_int(&nfsstatsv1.srvcache_size, 1);
718 1.1 dholland
719 1.1 dholland /*
720 1.1 dholland * For TCP, multiple entries for a key are allowed, so don't
721 1.1 dholland * chain it into the hash table until done.
722 1.1 dholland */
723 1.1 dholland newrp->rc_cachetime = NFSD_MONOSEC;
724 1.1 dholland newrp->rc_flag |= RC_INPROG;
725 1.1 dholland LIST_INSERT_HEAD(hp, newrp, rc_hash);
726 1.1 dholland mtx_unlock(mutex);
727 1.1 dholland nd->nd_rp = newrp;
728 1.1 dholland ret = RC_DOIT;
729 1.1 dholland
730 1.1 dholland out:
731 1.1 dholland NFSEXITCODE2(0, nd);
732 1.1 dholland return (ret);
733 1.1 dholland }
734 1.1 dholland
735 1.1 dholland /*
736 1.1 dholland * Lock a cache entry.
737 1.1 dholland */
738 1.1 dholland static void
739 1.1 dholland nfsrc_lock(struct nfsrvcache *rp)
740 1.1 dholland {
741 1.1 dholland struct mtx *mutex;
742 1.1 dholland
743 1.1 dholland mutex = nfsrc_cachemutex(rp);
744 1.1 dholland mtx_assert(mutex, MA_OWNED);
745 1.1 dholland while ((rp->rc_flag & RC_LOCKED) != 0) {
746 1.1 dholland rp->rc_flag |= RC_WANTED;
747 1.1 dholland (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
748 1.1 dholland }
749 1.1 dholland rp->rc_flag |= RC_LOCKED;
750 1.1 dholland }
751 1.1 dholland
752 1.1 dholland /*
753 1.1 dholland * Unlock a cache entry.
754 1.1 dholland */
755 1.1 dholland static void
756 1.1 dholland nfsrc_unlock(struct nfsrvcache *rp)
757 1.1 dholland {
758 1.1 dholland struct mtx *mutex;
759 1.1 dholland
760 1.1 dholland mutex = nfsrc_cachemutex(rp);
761 1.1 dholland mtx_lock(mutex);
762 1.1 dholland rp->rc_flag &= ~RC_LOCKED;
763 1.1 dholland nfsrc_wanted(rp);
764 1.1 dholland mtx_unlock(mutex);
765 1.1 dholland }
766 1.1 dholland
767 1.1 dholland /*
768 1.1 dholland * Wakeup anyone wanting entry.
769 1.1 dholland */
770 1.1 dholland static void
771 1.1 dholland nfsrc_wanted(struct nfsrvcache *rp)
772 1.1 dholland {
773 1.1 dholland if (rp->rc_flag & RC_WANTED) {
774 1.1 dholland rp->rc_flag &= ~RC_WANTED;
775 1.1 dholland wakeup((caddr_t)rp);
776 1.1 dholland }
777 1.1 dholland }
778 1.1 dholland
779 1.1 dholland /*
780 1.1 dholland * Free up the entry.
781 1.1 dholland * Must not sleep.
782 1.1 dholland */
783 1.1 dholland static void
784 1.1 dholland nfsrc_freecache(struct nfsrvcache *rp)
785 1.1 dholland {
786 1.2.12.1 skrll struct nfsrchash_bucket *hbp;
787 1.1 dholland
788 1.1 dholland LIST_REMOVE(rp, rc_hash);
789 1.1 dholland if (rp->rc_flag & RC_UDP) {
790 1.1 dholland TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
791 1.1 dholland nfsrc_udpcachesize--;
792 1.2.12.1 skrll } else if (rp->rc_acked != RC_NO_SEQ) {
793 1.2.12.1 skrll hbp = NFSRCAHASH(rp->rc_sockref);
794 1.2.12.1 skrll mtx_lock(&hbp->mtx);
795 1.2.12.1 skrll if (rp->rc_acked == RC_NO_ACK)
796 1.2.12.1 skrll LIST_REMOVE(rp, rc_ahash);
797 1.2.12.1 skrll mtx_unlock(&hbp->mtx);
798 1.1 dholland }
799 1.1 dholland nfsrc_wanted(rp);
800 1.1 dholland if (rp->rc_flag & RC_REPMBUF) {
801 1.1 dholland mbuf_freem(rp->rc_reply);
802 1.1 dholland if (!(rp->rc_flag & RC_UDP))
803 1.1 dholland atomic_add_int(&nfsrc_tcpsavedreplies, -1);
804 1.1 dholland }
805 1.1 dholland FREE((caddr_t)rp, M_NFSRVCACHE);
806 1.2.12.1 skrll atomic_add_int(&nfsstatsv1.srvcache_size, -1);
807 1.1 dholland }
808 1.1 dholland
809 1.1 dholland /*
810 1.1 dholland * Clean out the cache. Called when nfsserver module is unloaded.
811 1.1 dholland */
812 1.1 dholland APPLESTATIC void
813 1.1 dholland nfsrvd_cleancache(void)
814 1.1 dholland {
815 1.1 dholland struct nfsrvcache *rp, *nextrp;
816 1.1 dholland int i;
817 1.1 dholland
818 1.1 dholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
819 1.1 dholland mtx_lock(&nfsrchash_table[i].mtx);
820 1.1 dholland LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
821 1.1 dholland nfsrc_freecache(rp);
822 1.1 dholland mtx_unlock(&nfsrchash_table[i].mtx);
823 1.1 dholland }
824 1.1 dholland mtx_lock(&nfsrc_udpmtx);
825 1.1 dholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
826 1.1 dholland LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
827 1.1 dholland nfsrc_freecache(rp);
828 1.1 dholland }
829 1.1 dholland }
830 1.2.12.1 skrll nfsstatsv1.srvcache_size = 0;
831 1.1 dholland mtx_unlock(&nfsrc_udpmtx);
832 1.1 dholland nfsrc_tcpsavedreplies = 0;
833 1.1 dholland }
834 1.1 dholland
835 1.2.12.1 skrll #define HISTSIZE 16
836 1.1 dholland /*
837 1.1 dholland * The basic rule is to get rid of entries that are expired.
838 1.1 dholland */
839 1.2.12.1 skrll void
840 1.2.12.1 skrll nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final)
841 1.1 dholland {
842 1.2.12.1 skrll struct nfsrchash_bucket *hbp;
843 1.1 dholland struct nfsrvcache *rp, *nextrp;
844 1.2.12.1 skrll int force, lastslot, i, j, k, tto, time_histo[HISTSIZE];
845 1.1 dholland time_t thisstamp;
846 1.1 dholland static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
847 1.2.12.1 skrll static int onethread = 0, oneslot = 0;
848 1.2.12.1 skrll
849 1.2.12.1 skrll if (sockref != 0) {
850 1.2.12.1 skrll hbp = NFSRCAHASH(sockref);
851 1.2.12.1 skrll mtx_lock(&hbp->mtx);
852 1.2.12.1 skrll LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) {
853 1.2.12.1 skrll if (sockref == rp->rc_sockref) {
854 1.2.12.1 skrll if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) {
855 1.2.12.1 skrll rp->rc_acked = RC_ACK;
856 1.2.12.1 skrll LIST_REMOVE(rp, rc_ahash);
857 1.2.12.1 skrll } else if (final) {
858 1.2.12.1 skrll rp->rc_acked = RC_NACK;
859 1.2.12.1 skrll LIST_REMOVE(rp, rc_ahash);
860 1.2.12.1 skrll }
861 1.2.12.1 skrll }
862 1.2.12.1 skrll }
863 1.2.12.1 skrll mtx_unlock(&hbp->mtx);
864 1.2.12.1 skrll }
865 1.1 dholland
866 1.1 dholland if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
867 1.1 dholland return;
868 1.1 dholland if (NFSD_MONOSEC != udp_lasttrim ||
869 1.1 dholland nfsrc_udpcachesize >= (nfsrc_udphighwater +
870 1.1 dholland nfsrc_udphighwater / 2)) {
871 1.1 dholland mtx_lock(&nfsrc_udpmtx);
872 1.1 dholland udp_lasttrim = NFSD_MONOSEC;
873 1.1 dholland TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
874 1.1 dholland if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
875 1.1 dholland && rp->rc_refcnt == 0
876 1.1 dholland && ((rp->rc_flag & RC_REFCNT) ||
877 1.1 dholland udp_lasttrim > rp->rc_timestamp ||
878 1.1 dholland nfsrc_udpcachesize > nfsrc_udphighwater))
879 1.1 dholland nfsrc_freecache(rp);
880 1.1 dholland }
881 1.1 dholland mtx_unlock(&nfsrc_udpmtx);
882 1.1 dholland }
883 1.1 dholland if (NFSD_MONOSEC != tcp_lasttrim ||
884 1.1 dholland nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
885 1.2.12.1 skrll force = nfsrc_tcphighwater / 4;
886 1.2.12.1 skrll if (force > 0 &&
887 1.2.12.1 skrll nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) {
888 1.2.12.1 skrll for (i = 0; i < HISTSIZE; i++)
889 1.2.12.1 skrll time_histo[i] = 0;
890 1.2.12.1 skrll i = 0;
891 1.2.12.1 skrll lastslot = NFSRVCACHE_HASHSIZE - 1;
892 1.2.12.1 skrll } else {
893 1.2.12.1 skrll force = 0;
894 1.2.12.1 skrll if (NFSD_MONOSEC != tcp_lasttrim) {
895 1.2.12.1 skrll i = 0;
896 1.2.12.1 skrll lastslot = NFSRVCACHE_HASHSIZE - 1;
897 1.2.12.1 skrll } else {
898 1.2.12.1 skrll lastslot = i = oneslot;
899 1.2.12.1 skrll if (++oneslot >= NFSRVCACHE_HASHSIZE)
900 1.2.12.1 skrll oneslot = 0;
901 1.2.12.1 skrll }
902 1.2.12.1 skrll }
903 1.2.12.1 skrll tto = nfsrc_tcptimeout;
904 1.2.12.1 skrll tcp_lasttrim = NFSD_MONOSEC;
905 1.2.12.1 skrll for (; i <= lastslot; i++) {
906 1.1 dholland mtx_lock(&nfsrchash_table[i].mtx);
907 1.1 dholland LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
908 1.1 dholland nextrp) {
909 1.1 dholland if (!(rp->rc_flag &
910 1.1 dholland (RC_INPROG|RC_LOCKED|RC_WANTED))
911 1.1 dholland && rp->rc_refcnt == 0) {
912 1.2.12.1 skrll if ((rp->rc_flag & RC_REFCNT) ||
913 1.2.12.1 skrll tcp_lasttrim > rp->rc_timestamp ||
914 1.2.12.1 skrll rp->rc_acked == RC_ACK) {
915 1.2.12.1 skrll nfsrc_freecache(rp);
916 1.2.12.1 skrll continue;
917 1.2.12.1 skrll }
918 1.2.12.1 skrll
919 1.2.12.1 skrll if (force == 0)
920 1.2.12.1 skrll continue;
921 1.1 dholland /*
922 1.1 dholland * The timestamps range from roughly the
923 1.1 dholland * present (tcp_lasttrim) to the present
924 1.1 dholland * + nfsrc_tcptimeout. Generate a simple
925 1.1 dholland * histogram of where the timeouts fall.
926 1.1 dholland */
927 1.1 dholland j = rp->rc_timestamp - tcp_lasttrim;
928 1.2.12.1 skrll if (j >= tto)
929 1.2.12.1 skrll j = HISTSIZE - 1;
930 1.2.12.1 skrll else if (j < 0)
931 1.1 dholland j = 0;
932 1.2.12.1 skrll else
933 1.2.12.1 skrll j = j * HISTSIZE / tto;
934 1.1 dholland time_histo[j]++;
935 1.1 dholland }
936 1.1 dholland }
937 1.1 dholland mtx_unlock(&nfsrchash_table[i].mtx);
938 1.1 dholland }
939 1.2.12.1 skrll if (force) {
940 1.1 dholland /*
941 1.1 dholland * Trim some more with a smaller timeout of as little
942 1.1 dholland * as 20% of nfsrc_tcptimeout to try and get below
943 1.1 dholland * 80% of the nfsrc_tcphighwater.
944 1.1 dholland */
945 1.1 dholland k = 0;
946 1.2.12.1 skrll for (i = 0; i < (HISTSIZE - 2); i++) {
947 1.1 dholland k += time_histo[i];
948 1.2.12.1 skrll if (k > force)
949 1.1 dholland break;
950 1.1 dholland }
951 1.2.12.1 skrll k = tto * (i + 1) / HISTSIZE;
952 1.1 dholland if (k < 1)
953 1.1 dholland k = 1;
954 1.1 dholland thisstamp = tcp_lasttrim + k;
955 1.1 dholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
956 1.1 dholland mtx_lock(&nfsrchash_table[i].mtx);
957 1.1 dholland LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
958 1.1 dholland rc_hash, nextrp) {
959 1.1 dholland if (!(rp->rc_flag &
960 1.1 dholland (RC_INPROG|RC_LOCKED|RC_WANTED))
961 1.1 dholland && rp->rc_refcnt == 0
962 1.1 dholland && ((rp->rc_flag & RC_REFCNT) ||
963 1.1 dholland thisstamp > rp->rc_timestamp ||
964 1.2.12.1 skrll rp->rc_acked == RC_ACK))
965 1.1 dholland nfsrc_freecache(rp);
966 1.1 dholland }
967 1.1 dholland mtx_unlock(&nfsrchash_table[i].mtx);
968 1.1 dholland }
969 1.1 dholland }
970 1.1 dholland }
971 1.1 dholland atomic_store_rel_int(&onethread, 0);
972 1.1 dholland }
973 1.1 dholland
974 1.1 dholland /*
975 1.1 dholland * Add a seqid# reference to the cache entry.
976 1.1 dholland */
977 1.1 dholland APPLESTATIC void
978 1.1 dholland nfsrvd_refcache(struct nfsrvcache *rp)
979 1.1 dholland {
980 1.1 dholland struct mtx *mutex;
981 1.1 dholland
982 1.2.12.1 skrll if (rp == NULL)
983 1.2.12.1 skrll /* For NFSv4.1, there is no cache entry. */
984 1.2.12.1 skrll return;
985 1.1 dholland mutex = nfsrc_cachemutex(rp);
986 1.1 dholland mtx_lock(mutex);
987 1.1 dholland if (rp->rc_refcnt < 0)
988 1.1 dholland panic("nfs cache refcnt");
989 1.1 dholland rp->rc_refcnt++;
990 1.1 dholland mtx_unlock(mutex);
991 1.1 dholland }
992 1.1 dholland
993 1.1 dholland /*
994 1.1 dholland * Dereference a seqid# cache entry.
995 1.1 dholland */
996 1.1 dholland APPLESTATIC void
997 1.1 dholland nfsrvd_derefcache(struct nfsrvcache *rp)
998 1.1 dholland {
999 1.1 dholland struct mtx *mutex;
1000 1.1 dholland
1001 1.1 dholland mutex = nfsrc_cachemutex(rp);
1002 1.1 dholland mtx_lock(mutex);
1003 1.1 dholland if (rp->rc_refcnt <= 0)
1004 1.1 dholland panic("nfs cache derefcnt");
1005 1.1 dholland rp->rc_refcnt--;
1006 1.1 dholland if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
1007 1.1 dholland nfsrc_freecache(rp);
1008 1.1 dholland mtx_unlock(mutex);
1009 1.1 dholland }
1010 1.1 dholland
1011 1.1 dholland /*
1012 1.1 dholland * Calculate the length of the mbuf list and a checksum on the first up to
1013 1.1 dholland * NFSRVCACHE_CHECKLEN bytes.
1014 1.1 dholland */
1015 1.1 dholland static int
1016 1.1 dholland nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
1017 1.1 dholland {
1018 1.1 dholland int len = 0, cklen;
1019 1.1 dholland mbuf_t m;
1020 1.1 dholland
1021 1.1 dholland m = m1;
1022 1.1 dholland while (m) {
1023 1.1 dholland len += mbuf_len(m);
1024 1.1 dholland m = mbuf_next(m);
1025 1.1 dholland }
1026 1.1 dholland cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
1027 1.1 dholland *cksum = in_cksum(m1, cklen);
1028 1.1 dholland return (len);
1029 1.1 dholland }
1030 1.1 dholland
1031 1.1 dholland /*
1032 1.1 dholland * Mark a TCP connection that is seeing retries. Should never happen for
1033 1.1 dholland * NFSv4.
1034 1.1 dholland */
1035 1.1 dholland static void
1036 1.1 dholland nfsrc_marksametcpconn(u_int64_t sockref)
1037 1.1 dholland {
1038 1.1 dholland }
1039 1.1 dholland
1040