uvm_readahead.c revision 1.1.2.9 1 1.1.2.9 yamt /* $NetBSD: uvm_readahead.c,v 1.1.2.9 2005/11/18 08:44:55 yamt Exp $ */
2 1.1.2.1 yamt
3 1.1.2.1 yamt /*-
4 1.1.2.1 yamt * Copyright (c)2003, 2005 YAMAMOTO Takashi,
5 1.1.2.1 yamt * All rights reserved.
6 1.1.2.1 yamt *
7 1.1.2.1 yamt * Redistribution and use in source and binary forms, with or without
8 1.1.2.1 yamt * modification, are permitted provided that the following conditions
9 1.1.2.1 yamt * are met:
10 1.1.2.1 yamt * 1. Redistributions of source code must retain the above copyright
11 1.1.2.1 yamt * notice, this list of conditions and the following disclaimer.
12 1.1.2.1 yamt * 2. Redistributions in binary form must reproduce the above copyright
13 1.1.2.1 yamt * notice, this list of conditions and the following disclaimer in the
14 1.1.2.1 yamt * documentation and/or other materials provided with the distribution.
15 1.1.2.1 yamt *
16 1.1.2.1 yamt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 1.1.2.1 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1.2.1 yamt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1.2.1 yamt * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 1.1.2.1 yamt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.1.2.1 yamt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.1.2.1 yamt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.1.2.1 yamt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.1.2.1 yamt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1.2.1 yamt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1.2.1 yamt * SUCH DAMAGE.
27 1.1.2.1 yamt */
28 1.1.2.1 yamt
29 1.1.2.1 yamt #include <sys/cdefs.h>
30 1.1.2.9 yamt __KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.1.2.9 2005/11/18 08:44:55 yamt Exp $");
31 1.1.2.1 yamt
32 1.1.2.1 yamt #include <sys/param.h>
33 1.1.2.1 yamt #include <sys/pool.h>
34 1.1.2.1 yamt
35 1.1.2.1 yamt #include <uvm/uvm.h>
36 1.1.2.1 yamt #include <uvm/uvm_readahead.h>
37 1.1.2.1 yamt
38 1.1.2.6 yamt #if defined(READAHEAD_DEBUG)
39 1.1.2.6 yamt #define DPRINTF(a) printf a
40 1.1.2.6 yamt #else /* defined(READAHEAD_DEBUG) */
41 1.1.2.6 yamt #define DPRINTF(a) /* nothing */
42 1.1.2.6 yamt #endif /* defined(READAHEAD_DEBUG) */
43 1.1.2.6 yamt
44 1.1.2.5 yamt /*
45 1.1.2.5 yamt * uvm_ractx: read-ahead context.
46 1.1.2.5 yamt */
47 1.1.2.5 yamt
48 1.1.2.1 yamt struct uvm_ractx {
49 1.1.2.1 yamt int ra_flags;
50 1.1.2.1 yamt #define RA_VALID 1
51 1.1.2.5 yamt off_t ra_winstart; /* window start offset */
52 1.1.2.5 yamt size_t ra_winsize; /* window size */
53 1.1.2.5 yamt off_t ra_next; /* next offset to read-ahead */
54 1.1.2.1 yamt };
55 1.1.2.1 yamt
56 1.1.2.1 yamt /*
57 1.1.2.1 yamt * XXX tune
58 1.1.2.5 yamt * XXX should consider the amount of memory in the system.
59 1.1.2.5 yamt * XXX should consider the speed of the underlying device.
60 1.1.2.1 yamt */
61 1.1.2.1 yamt
62 1.1.2.5 yamt #define RA_WINSIZE_INIT MAXPHYS /* initial window size */
63 1.1.2.5 yamt #define RA_WINSIZE_MAX (MAXPHYS * 8) /* max window size */
64 1.1.2.5 yamt #define RA_WINSIZE_SEQENTIAL RA_WINSIZE_MAX /* fixed window size used for
65 1.1.2.5 yamt SEQUENTIAL hint */
66 1.1.2.5 yamt #define RA_MINSIZE (MAXPHYS * 2) /* min size to start i/o */
67 1.1.2.5 yamt #define RA_IOCHUNK MAXPHYS /* read-ahead i/o chunk size */
68 1.1.2.1 yamt
69 1.1.2.1 yamt static off_t ra_startio(struct uvm_object *, off_t, size_t);
70 1.1.2.1 yamt static struct uvm_ractx *ra_allocctx(void);
71 1.1.2.1 yamt static void ra_freectx(struct uvm_ractx *);
72 1.1.2.1 yamt
73 1.1.2.1 yamt POOL_INIT(ractx_pool, sizeof(struct uvm_ractx), 0, 0, 0, "ractx",
74 1.1.2.1 yamt &pool_allocator_nointr);
75 1.1.2.1 yamt
76 1.1.2.1 yamt static struct uvm_ractx *
77 1.1.2.1 yamt ra_allocctx(void)
78 1.1.2.1 yamt {
79 1.1.2.1 yamt
80 1.1.2.1 yamt return pool_get(&ractx_pool, PR_NOWAIT);
81 1.1.2.1 yamt }
82 1.1.2.1 yamt
83 1.1.2.1 yamt static void
84 1.1.2.1 yamt ra_freectx(struct uvm_ractx *ra)
85 1.1.2.1 yamt {
86 1.1.2.1 yamt
87 1.1.2.1 yamt pool_put(&ractx_pool, ra);
88 1.1.2.1 yamt }
89 1.1.2.1 yamt
90 1.1.2.5 yamt /*
91 1.1.2.5 yamt * ra_startio: start i/o for read-ahead.
92 1.1.2.5 yamt *
93 1.1.2.5 yamt * => start i/o for each RA_IOCHUNK sized chunk.
94 1.1.2.5 yamt * => return offset to which we started i/o.
95 1.1.2.5 yamt */
96 1.1.2.5 yamt
97 1.1.2.1 yamt static off_t
98 1.1.2.1 yamt ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
99 1.1.2.1 yamt {
100 1.1.2.1 yamt const off_t endoff = off + sz;
101 1.1.2.1 yamt
102 1.1.2.6 yamt DPRINTF(("%s: uobj=%p, off=%" PRIu64 ", endoff=%" PRIu64 "\n",
103 1.1.2.6 yamt __func__, uobj, off, endoff));
104 1.1.2.1 yamt off = trunc_page(off);
105 1.1.2.1 yamt while (off < endoff) {
106 1.1.2.5 yamt const size_t chunksize = RA_IOCHUNK;
107 1.1.2.1 yamt int error;
108 1.1.2.1 yamt size_t donebytes;
109 1.1.2.1 yamt int npages;
110 1.1.2.1 yamt int orignpages;
111 1.1.2.1 yamt size_t bytelen;
112 1.1.2.1 yamt
113 1.1.2.1 yamt KASSERT((chunksize & (chunksize - 1)) == 0);
114 1.1.2.1 yamt KASSERT((off & PAGE_MASK) == 0);
115 1.1.2.1 yamt bytelen = ((off + chunksize) & -(off_t)chunksize) - off;
116 1.1.2.6 yamt DPRINTF(("%s: off=%" PRIu64 ", bytelen=%zu\n",
117 1.1.2.6 yamt __func__, off, bytelen));
118 1.1.2.1 yamt KASSERT((bytelen & PAGE_MASK) == 0);
119 1.1.2.1 yamt npages = orignpages = bytelen >> PAGE_SHIFT;
120 1.1.2.1 yamt KASSERT(npages != 0);
121 1.1.2.1 yamt simple_lock(&uobj->vmobjlock);
122 1.1.2.1 yamt error = (*uobj->pgops->pgo_get)(uobj, off, NULL,
123 1.1.2.1 yamt &npages, 0, VM_PROT_READ, 0, 0);
124 1.1.2.1 yamt if (error) {
125 1.1.2.5 yamt if (error != EINVAL) { /* maybe past EOF */
126 1.1.2.6 yamt DPRINTF(("%s: error=%d\n", __func__, error));
127 1.1.2.1 yamt }
128 1.1.2.1 yamt break;
129 1.1.2.1 yamt }
130 1.1.2.4 yamt KASSERT(orignpages == npages);
131 1.1.2.1 yamt donebytes = orignpages << PAGE_SHIFT;
132 1.1.2.1 yamt off += donebytes;
133 1.1.2.1 yamt }
134 1.1.2.1 yamt
135 1.1.2.1 yamt return off;
136 1.1.2.1 yamt }
137 1.1.2.1 yamt
138 1.1.2.1 yamt /* ------------------------------------------------------------ */
139 1.1.2.1 yamt
140 1.1.2.1 yamt struct uvm_ractx *
141 1.1.2.9 yamt uvm_ra_allocctx(void)
142 1.1.2.1 yamt {
143 1.1.2.1 yamt struct uvm_ractx *ra;
144 1.1.2.1 yamt
145 1.1.2.1 yamt ra = ra_allocctx();
146 1.1.2.1 yamt if (ra != NULL) {
147 1.1.2.1 yamt ra->ra_flags = 0;
148 1.1.2.1 yamt }
149 1.1.2.1 yamt
150 1.1.2.1 yamt return ra;
151 1.1.2.1 yamt }
152 1.1.2.1 yamt
153 1.1.2.1 yamt void
154 1.1.2.1 yamt uvm_ra_freectx(struct uvm_ractx *ra)
155 1.1.2.1 yamt {
156 1.1.2.1 yamt
157 1.1.2.1 yamt KASSERT(ra != NULL);
158 1.1.2.1 yamt ra_freectx(ra);
159 1.1.2.1 yamt }
160 1.1.2.1 yamt
161 1.1.2.5 yamt /*
162 1.1.2.5 yamt * uvm_ra_request: start i/o for read-ahead if appropriate.
163 1.1.2.5 yamt *
164 1.1.2.5 yamt * => called by filesystems when [reqoff, reqoff+reqsize) is requested.
165 1.1.2.5 yamt */
166 1.1.2.5 yamt
167 1.1.2.1 yamt void
168 1.1.2.9 yamt uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj,
169 1.1.2.1 yamt off_t reqoff, size_t reqsize)
170 1.1.2.1 yamt {
171 1.1.2.1 yamt
172 1.1.2.9 yamt if (ra == NULL || advice == UVM_ADV_RANDOM) {
173 1.1.2.1 yamt return;
174 1.1.2.1 yamt }
175 1.1.2.1 yamt
176 1.1.2.9 yamt /*
177 1.1.2.9 yamt * XXX needs locking? maybe.
178 1.1.2.9 yamt * but the worst effect is merely a bad read-ahead.
179 1.1.2.9 yamt */
180 1.1.2.2 yamt
181 1.1.2.9 yamt if (advice == UVM_ADV_SEQUENTIAL) {
182 1.1.2.5 yamt
183 1.1.2.5 yamt /*
184 1.1.2.5 yamt * always do read-ahead with a large window.
185 1.1.2.5 yamt */
186 1.1.2.5 yamt
187 1.1.2.9 yamt if ((ra->ra_flags & RA_VALID) == 0) {
188 1.1.2.9 yamt ra->ra_winstart = ra->ra_next = 0;
189 1.1.2.9 yamt ra->ra_flags |= RA_VALID;
190 1.1.2.9 yamt }
191 1.1.2.2 yamt if (reqoff <= ra->ra_winstart) {
192 1.1.2.2 yamt ra->ra_next = reqoff;
193 1.1.2.2 yamt }
194 1.1.2.2 yamt ra->ra_winsize = RA_WINSIZE_SEQENTIAL;
195 1.1.2.2 yamt goto do_readahead;
196 1.1.2.2 yamt }
197 1.1.2.2 yamt
198 1.1.2.5 yamt /*
199 1.1.2.9 yamt * a request with UVM_ADV_NORMAL hint. (ie. no hint)
200 1.1.2.5 yamt *
201 1.1.2.5 yamt * we keep a sliding window in order to determine:
202 1.1.2.5 yamt * - if the previous read-ahead was successful or not.
203 1.1.2.5 yamt * - how many bytes to read-ahead.
204 1.1.2.5 yamt */
205 1.1.2.5 yamt
206 1.1.2.5 yamt /*
207 1.1.2.5 yamt * if it's the first request for this context,
208 1.1.2.5 yamt * initialize context and return.
209 1.1.2.5 yamt */
210 1.1.2.5 yamt
211 1.1.2.1 yamt if ((ra->ra_flags & RA_VALID) == 0) {
212 1.1.2.1 yamt initialize:
213 1.1.2.1 yamt ra->ra_winstart = ra->ra_next = reqoff + reqsize;
214 1.1.2.1 yamt ra->ra_winsize = RA_WINSIZE_INIT;
215 1.1.2.1 yamt ra->ra_flags |= RA_VALID;
216 1.1.2.9 yamt goto done;
217 1.1.2.1 yamt }
218 1.1.2.1 yamt
219 1.1.2.5 yamt /*
220 1.1.2.5 yamt * if it isn't in our window,
221 1.1.2.5 yamt * initialize context and return.
222 1.1.2.5 yamt * (read-ahead miss)
223 1.1.2.5 yamt */
224 1.1.2.5 yamt
225 1.1.2.1 yamt if (reqoff < ra->ra_winstart ||
226 1.1.2.1 yamt ra->ra_winstart + ra->ra_winsize < reqoff) {
227 1.1.2.1 yamt goto initialize;
228 1.1.2.1 yamt }
229 1.1.2.1 yamt
230 1.1.2.1 yamt /*
231 1.1.2.5 yamt * it's in our window. (read-ahead hit)
232 1.1.2.5 yamt * - start read-ahead i/o if appropriate.
233 1.1.2.5 yamt * - advance and enlarge window.
234 1.1.2.1 yamt */
235 1.1.2.1 yamt
236 1.1.2.2 yamt do_readahead:
237 1.1.2.5 yamt
238 1.1.2.5 yamt /*
239 1.1.2.5 yamt * don't bother to read-ahead behind current request.
240 1.1.2.5 yamt */
241 1.1.2.5 yamt
242 1.1.2.1 yamt if (reqoff > ra->ra_next) {
243 1.1.2.1 yamt ra->ra_next = reqoff;
244 1.1.2.1 yamt }
245 1.1.2.1 yamt
246 1.1.2.5 yamt /*
247 1.1.2.5 yamt * try to make [reqoff, reqoff+ra_winsize) in-core.
248 1.1.2.8 yamt * note that [reqoff, ra_next) is considered already done.
249 1.1.2.5 yamt */
250 1.1.2.5 yamt
251 1.1.2.1 yamt if (reqoff + ra->ra_winsize > ra->ra_next) {
252 1.1.2.1 yamt off_t raoff = MAX(reqoff, ra->ra_next);
253 1.1.2.1 yamt size_t rasize = reqoff + ra->ra_winsize - ra->ra_next;
254 1.1.2.1 yamt
255 1.1.2.9 yamt #if defined(DIAGNOSTIC)
256 1.1.2.9 yamt if (rasize > RA_WINSIZE_MAX) {
257 1.1.2.9 yamt
258 1.1.2.9 yamt /*
259 1.1.2.9 yamt * shouldn't happen as far as we're protected by
260 1.1.2.9 yamt * kernel_lock.
261 1.1.2.9 yamt */
262 1.1.2.9 yamt
263 1.1.2.9 yamt printf("%s: corrupted context", __func__);
264 1.1.2.9 yamt rasize = RA_WINSIZE_MAX;
265 1.1.2.9 yamt }
266 1.1.2.9 yamt #endif /* defined(DIAGNOSTIC) */
267 1.1.2.9 yamt
268 1.1.2.5 yamt /*
269 1.1.2.5 yamt * issue read-ahead only if we can start big enough i/o.
270 1.1.2.5 yamt * otherwise we end up with a stream of small i/o.
271 1.1.2.5 yamt */
272 1.1.2.5 yamt
273 1.1.2.1 yamt if (rasize >= RA_MINSIZE) {
274 1.1.2.1 yamt ra->ra_next = ra_startio(uobj, raoff, rasize);
275 1.1.2.1 yamt }
276 1.1.2.1 yamt }
277 1.1.2.1 yamt
278 1.1.2.1 yamt /*
279 1.1.2.5 yamt * update window.
280 1.1.2.5 yamt *
281 1.1.2.5 yamt * enlarge window by reqsize, so that it grows in a predictable manner
282 1.1.2.5 yamt * regardless of the size of each read(2).
283 1.1.2.1 yamt */
284 1.1.2.1 yamt
285 1.1.2.1 yamt ra->ra_winstart = reqoff + reqsize;
286 1.1.2.1 yamt ra->ra_winsize = MIN(RA_WINSIZE_MAX, ra->ra_winsize + reqsize);
287 1.1.2.9 yamt
288 1.1.2.9 yamt done:;
289 1.1.2.1 yamt }
290