Home | History | Annotate | Line # | Download | only in uvm
uvm_readahead.c revision 1.6.2.1
      1      1.6      yamt /*	$NetBSD: uvm_readahead.c,v 1.6.2.1 2010/10/22 07:22:57 uebayasi Exp $	*/
      2      1.2      yamt 
      3      1.2      yamt /*-
      4      1.6      yamt  * Copyright (c)2003, 2005, 2009 YAMAMOTO Takashi,
      5      1.2      yamt  * All rights reserved.
      6      1.2      yamt  *
      7      1.2      yamt  * Redistribution and use in source and binary forms, with or without
      8      1.2      yamt  * modification, are permitted provided that the following conditions
      9      1.2      yamt  * are met:
     10      1.2      yamt  * 1. Redistributions of source code must retain the above copyright
     11      1.2      yamt  *    notice, this list of conditions and the following disclaimer.
     12      1.2      yamt  * 2. Redistributions in binary form must reproduce the above copyright
     13      1.2      yamt  *    notice, this list of conditions and the following disclaimer in the
     14      1.2      yamt  *    documentation and/or other materials provided with the distribution.
     15      1.2      yamt  *
     16      1.2      yamt  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17      1.2      yamt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18      1.2      yamt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19      1.2      yamt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20      1.2      yamt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21      1.2      yamt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22      1.2      yamt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23      1.2      yamt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24      1.2      yamt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25      1.2      yamt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26      1.2      yamt  * SUCH DAMAGE.
     27      1.2      yamt  */
     28      1.2      yamt 
     29      1.2      yamt /*
     30      1.2      yamt  * uvm_object read-ahead
     31      1.2      yamt  *
     32      1.2      yamt  * TODO:
     33      1.2      yamt  *	- tune.
     34      1.2      yamt  *	- handle multiple streams.
     35      1.2      yamt  *	- find a better way to deal with PGO_LOCKED pager requests.
     36      1.2      yamt  *	  (currently just ignored)
     37      1.2      yamt  *	- consider the amount of memory in the system.
     38      1.2      yamt  *	- consider the speed of the underlying device.
     39      1.2      yamt  *	- consider filesystem block size / block layout.
     40      1.2      yamt  */
     41      1.2      yamt 
     42      1.2      yamt #include <sys/cdefs.h>
     43      1.6      yamt __KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.6.2.1 2010/10/22 07:22:57 uebayasi Exp $");
     44      1.2      yamt 
     45      1.2      yamt #include <sys/param.h>
     46      1.2      yamt #include <sys/pool.h>
     47      1.2      yamt 
     48      1.2      yamt #include <uvm/uvm.h>
     49      1.2      yamt #include <uvm/uvm_readahead.h>
     50      1.2      yamt 
     51      1.2      yamt #if defined(READAHEAD_DEBUG)
     52      1.2      yamt #define	DPRINTF(a)	printf a
     53      1.2      yamt #else /* defined(READAHEAD_DEBUG) */
     54      1.2      yamt #define	DPRINTF(a)	/* nothing */
     55      1.2      yamt #endif /* defined(READAHEAD_DEBUG) */
     56      1.2      yamt 
     57      1.2      yamt /*
     58      1.2      yamt  * uvm_ractx: read-ahead context.
     59      1.2      yamt  */
     60      1.2      yamt 
     61      1.2      yamt struct uvm_ractx {
     62      1.2      yamt 	int ra_flags;
     63      1.2      yamt #define	RA_VALID	1
     64      1.2      yamt 	off_t ra_winstart;	/* window start offset */
     65      1.2      yamt 	size_t ra_winsize;	/* window size */
     66      1.2      yamt 	off_t ra_next;		/* next offset to read-ahead */
     67      1.2      yamt };
     68      1.2      yamt 
     69  1.6.2.1  uebayasi #if defined(sun2) || defined(sun3)
     70  1.6.2.1  uebayasi /* XXX: on sun2 and sun3 MAXPHYS is 0xe000 */
     71      1.4   tsutsui #undef MAXPHYS
     72      1.4   tsutsui #define MAXPHYS		0x8000	/* XXX */
     73      1.4   tsutsui #endif
     74      1.4   tsutsui 
     75      1.2      yamt #define	RA_WINSIZE_INIT	MAXPHYS			/* initial window size */
     76      1.2      yamt #define	RA_WINSIZE_MAX	(MAXPHYS * 8)		/* max window size */
     77      1.2      yamt #define	RA_WINSIZE_SEQENTIAL	RA_WINSIZE_MAX	/* fixed window size used for
     78      1.2      yamt 						   SEQUENTIAL hint */
     79      1.2      yamt #define	RA_MINSIZE	(MAXPHYS * 2)		/* min size to start i/o */
     80      1.2      yamt #define	RA_IOCHUNK	MAXPHYS			/* read-ahead i/o chunk size */
     81      1.2      yamt 
     82      1.2      yamt static off_t ra_startio(struct uvm_object *, off_t, size_t);
     83      1.2      yamt static struct uvm_ractx *ra_allocctx(void);
     84      1.2      yamt static void ra_freectx(struct uvm_ractx *);
     85      1.2      yamt 
     86      1.5        ad static struct pool_cache ractx_cache;
     87      1.5        ad 
     88      1.5        ad /*
     89      1.5        ad  * uvm_ra_init: initialize readahead module.
     90      1.5        ad  */
     91      1.5        ad 
     92      1.5        ad void
     93      1.5        ad uvm_ra_init(void)
     94      1.5        ad {
     95      1.5        ad 
     96      1.5        ad 	pool_cache_bootstrap(&ractx_cache, sizeof(struct uvm_ractx), 0, 0, 0,
     97      1.5        ad 	    "ractx", NULL, IPL_NONE, NULL, NULL, NULL);
     98      1.5        ad }
     99      1.2      yamt 
    100      1.2      yamt static struct uvm_ractx *
    101      1.2      yamt ra_allocctx(void)
    102      1.2      yamt {
    103      1.2      yamt 
    104      1.5        ad 	return pool_cache_get(&ractx_cache, PR_NOWAIT);
    105      1.2      yamt }
    106      1.2      yamt 
    107      1.2      yamt static void
    108      1.2      yamt ra_freectx(struct uvm_ractx *ra)
    109      1.2      yamt {
    110      1.2      yamt 
    111      1.5        ad 	pool_cache_put(&ractx_cache, ra);
    112      1.2      yamt }
    113      1.2      yamt 
    114      1.2      yamt /*
    115      1.2      yamt  * ra_startio: start i/o for read-ahead.
    116      1.2      yamt  *
    117      1.2      yamt  * => start i/o for each RA_IOCHUNK sized chunk.
    118      1.2      yamt  * => return offset to which we started i/o.
    119      1.2      yamt  */
    120      1.2      yamt 
    121      1.2      yamt static off_t
    122      1.2      yamt ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
    123      1.2      yamt {
    124      1.2      yamt 	const off_t endoff = off + sz;
    125      1.2      yamt 
    126      1.2      yamt 	DPRINTF(("%s: uobj=%p, off=%" PRIu64 ", endoff=%" PRIu64 "\n",
    127      1.2      yamt 	    __func__, uobj, off, endoff));
    128      1.2      yamt 	off = trunc_page(off);
    129      1.2      yamt 	while (off < endoff) {
    130      1.2      yamt 		const size_t chunksize = RA_IOCHUNK;
    131      1.2      yamt 		int error;
    132      1.2      yamt 		size_t donebytes;
    133      1.2      yamt 		int npages;
    134      1.2      yamt 		int orignpages;
    135      1.2      yamt 		size_t bytelen;
    136      1.2      yamt 
    137      1.2      yamt 		KASSERT((chunksize & (chunksize - 1)) == 0);
    138      1.2      yamt 		KASSERT((off & PAGE_MASK) == 0);
    139      1.2      yamt 		bytelen = ((off + chunksize) & -(off_t)chunksize) - off;
    140      1.2      yamt 		KASSERT((bytelen & PAGE_MASK) == 0);
    141      1.2      yamt 		npages = orignpages = bytelen >> PAGE_SHIFT;
    142      1.2      yamt 		KASSERT(npages != 0);
    143      1.2      yamt 
    144      1.2      yamt 		/*
    145      1.2      yamt 		 * use UVM_ADV_RANDOM to avoid recursion.
    146      1.2      yamt 		 */
    147      1.2      yamt 
    148      1.6      yamt 		mutex_enter(&uobj->vmobjlock);
    149      1.2      yamt 		error = (*uobj->pgops->pgo_get)(uobj, off, NULL,
    150      1.2      yamt 		    &npages, 0, VM_PROT_READ, UVM_ADV_RANDOM, 0);
    151      1.2      yamt 		DPRINTF(("%s:  off=%" PRIu64 ", bytelen=%zu -> %d\n",
    152      1.2      yamt 		    __func__, off, bytelen, error));
    153      1.2      yamt 		if (error != 0 && error != EBUSY) {
    154      1.2      yamt 			if (error != EINVAL) { /* maybe past EOF */
    155      1.2      yamt 				DPRINTF(("%s: error=%d\n", __func__, error));
    156      1.2      yamt 			}
    157      1.2      yamt 			break;
    158      1.2      yamt 		}
    159      1.2      yamt 		KASSERT(orignpages == npages);
    160      1.2      yamt 		donebytes = orignpages << PAGE_SHIFT;
    161      1.2      yamt 		off += donebytes;
    162      1.2      yamt 	}
    163      1.2      yamt 
    164      1.2      yamt 	return off;
    165      1.2      yamt }
    166      1.2      yamt 
    167      1.2      yamt /* ------------------------------------------------------------ */
    168      1.2      yamt 
    169      1.2      yamt /*
    170      1.2      yamt  * uvm_ra_allocctx: allocate a context.
    171      1.2      yamt  */
    172      1.2      yamt 
    173      1.2      yamt struct uvm_ractx *
    174      1.2      yamt uvm_ra_allocctx(void)
    175      1.2      yamt {
    176      1.2      yamt 	struct uvm_ractx *ra;
    177      1.2      yamt 
    178      1.2      yamt 	ra = ra_allocctx();
    179      1.2      yamt 	if (ra != NULL) {
    180      1.2      yamt 		ra->ra_flags = 0;
    181      1.2      yamt 	}
    182      1.2      yamt 
    183      1.2      yamt 	return ra;
    184      1.2      yamt }
    185      1.2      yamt 
    186      1.2      yamt /*
    187      1.2      yamt  * uvm_ra_freectx: free a context.
    188      1.2      yamt  */
    189      1.2      yamt 
    190      1.2      yamt void
    191      1.2      yamt uvm_ra_freectx(struct uvm_ractx *ra)
    192      1.2      yamt {
    193      1.2      yamt 
    194      1.2      yamt 	KASSERT(ra != NULL);
    195      1.2      yamt 	ra_freectx(ra);
    196      1.2      yamt }
    197      1.2      yamt 
    198      1.2      yamt /*
    199      1.2      yamt  * uvm_ra_request: update a read-ahead context and start i/o if appropriate.
    200      1.2      yamt  *
    201      1.2      yamt  * => called when [reqoff, reqoff+reqsize) is requested.
    202      1.5        ad  * => object must be locked by caller, will return locked.
    203      1.2      yamt  */
    204      1.2      yamt 
    205      1.2      yamt void
    206      1.2      yamt uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj,
    207      1.2      yamt     off_t reqoff, size_t reqsize)
    208      1.2      yamt {
    209      1.2      yamt 
    210      1.5        ad 	KASSERT(mutex_owned(&uobj->vmobjlock));
    211      1.5        ad 
    212      1.2      yamt 	if (ra == NULL || advice == UVM_ADV_RANDOM) {
    213      1.2      yamt 		return;
    214      1.2      yamt 	}
    215      1.2      yamt 
    216      1.2      yamt 	if (advice == UVM_ADV_SEQUENTIAL) {
    217      1.2      yamt 
    218      1.2      yamt 		/*
    219      1.2      yamt 		 * always do read-ahead with a large window.
    220      1.2      yamt 		 */
    221      1.2      yamt 
    222      1.2      yamt 		if ((ra->ra_flags & RA_VALID) == 0) {
    223      1.2      yamt 			ra->ra_winstart = ra->ra_next = 0;
    224      1.2      yamt 			ra->ra_flags |= RA_VALID;
    225      1.2      yamt 		}
    226      1.2      yamt 		if (reqoff < ra->ra_winstart) {
    227      1.2      yamt 			ra->ra_next = reqoff;
    228      1.2      yamt 		}
    229      1.2      yamt 		ra->ra_winsize = RA_WINSIZE_SEQENTIAL;
    230      1.2      yamt 		goto do_readahead;
    231      1.2      yamt 	}
    232      1.2      yamt 
    233      1.2      yamt 	/*
    234      1.2      yamt 	 * a request with UVM_ADV_NORMAL hint.  (ie. no hint)
    235      1.2      yamt 	 *
    236      1.2      yamt 	 * we keep a sliding window in order to determine:
    237      1.2      yamt 	 *	- if the previous read-ahead was successful or not.
    238      1.2      yamt 	 *	- how many bytes to read-ahead.
    239      1.2      yamt 	 */
    240      1.2      yamt 
    241      1.2      yamt 	/*
    242      1.2      yamt 	 * if it's the first request for this context,
    243      1.2      yamt 	 * initialize context and return.
    244      1.2      yamt 	 */
    245      1.2      yamt 
    246      1.2      yamt 	if ((ra->ra_flags & RA_VALID) == 0) {
    247      1.2      yamt initialize:
    248      1.2      yamt 		ra->ra_winstart = ra->ra_next = reqoff + reqsize;
    249      1.2      yamt 		ra->ra_winsize = RA_WINSIZE_INIT;
    250      1.2      yamt 		ra->ra_flags |= RA_VALID;
    251      1.2      yamt 		goto done;
    252      1.2      yamt 	}
    253      1.2      yamt 
    254      1.2      yamt 	/*
    255      1.2      yamt 	 * if it isn't in our window,
    256      1.2      yamt 	 * initialize context and return.
    257      1.2      yamt 	 * (read-ahead miss)
    258      1.2      yamt 	 */
    259      1.2      yamt 
    260      1.2      yamt 	if (reqoff < ra->ra_winstart ||
    261      1.2      yamt 	    ra->ra_winstart + ra->ra_winsize < reqoff) {
    262      1.2      yamt 
    263      1.2      yamt 		/*
    264      1.2      yamt 		 * ... unless we seem to be reading the same chunk repeatedly.
    265      1.2      yamt 		 *
    266      1.2      yamt 		 * XXX should have some margin?
    267      1.2      yamt 		 */
    268      1.2      yamt 
    269      1.2      yamt 		if (reqoff + reqsize == ra->ra_winstart) {
    270      1.2      yamt 			DPRINTF(("%s: %p: same block: off=%" PRIu64
    271      1.2      yamt 			    ", size=%zd, winstart=%" PRIu64 "\n",
    272      1.2      yamt 			    __func__, ra, reqoff, reqsize, ra->ra_winstart));
    273      1.2      yamt 			goto done;
    274      1.2      yamt 		}
    275      1.2      yamt 		goto initialize;
    276      1.2      yamt 	}
    277      1.2      yamt 
    278      1.2      yamt 	/*
    279      1.2      yamt 	 * it's in our window. (read-ahead hit)
    280      1.2      yamt 	 *	- start read-ahead i/o if appropriate.
    281      1.2      yamt 	 *	- advance and enlarge window.
    282      1.2      yamt 	 */
    283      1.2      yamt 
    284      1.2      yamt do_readahead:
    285      1.2      yamt 
    286      1.2      yamt 	/*
    287      1.2      yamt 	 * don't bother to read-ahead behind current request.
    288      1.2      yamt 	 */
    289      1.2      yamt 
    290      1.2      yamt 	if (reqoff > ra->ra_next) {
    291      1.2      yamt 		ra->ra_next = reqoff;
    292      1.2      yamt 	}
    293      1.2      yamt 
    294      1.2      yamt 	/*
    295      1.2      yamt 	 * try to make [reqoff, reqoff+ra_winsize) in-core.
    296      1.2      yamt 	 * note that [reqoff, ra_next) is considered already done.
    297      1.2      yamt 	 */
    298      1.2      yamt 
    299      1.2      yamt 	if (reqoff + ra->ra_winsize > ra->ra_next) {
    300      1.2      yamt 		off_t raoff = MAX(reqoff, ra->ra_next);
    301      1.2      yamt 		size_t rasize = reqoff + ra->ra_winsize - ra->ra_next;
    302      1.2      yamt 
    303      1.2      yamt #if defined(DIAGNOSTIC)
    304      1.2      yamt 		if (rasize > RA_WINSIZE_MAX) {
    305      1.2      yamt 			printf("%s: corrupted context", __func__);
    306      1.2      yamt 			rasize = RA_WINSIZE_MAX;
    307      1.2      yamt 		}
    308      1.2      yamt #endif /* defined(DIAGNOSTIC) */
    309      1.2      yamt 
    310      1.2      yamt 		/*
    311      1.2      yamt 		 * issue read-ahead only if we can start big enough i/o.
    312      1.2      yamt 		 * otherwise we end up with a stream of small i/o.
    313      1.2      yamt 		 */
    314      1.2      yamt 
    315      1.2      yamt 		if (rasize >= RA_MINSIZE) {
    316      1.6      yamt 			off_t next;
    317      1.6      yamt 
    318      1.6      yamt 			mutex_exit(&uobj->vmobjlock);
    319      1.6      yamt 			next = ra_startio(uobj, raoff, rasize);
    320      1.6      yamt 			mutex_enter(&uobj->vmobjlock);
    321      1.6      yamt 			ra->ra_next = next;
    322      1.2      yamt 		}
    323      1.2      yamt 	}
    324      1.2      yamt 
    325      1.2      yamt 	/*
    326      1.2      yamt 	 * update window.
    327      1.2      yamt 	 *
    328      1.2      yamt 	 * enlarge window by reqsize, so that it grows in a predictable manner
    329      1.2      yamt 	 * regardless of the size of each read(2).
    330      1.2      yamt 	 */
    331      1.2      yamt 
    332      1.2      yamt 	ra->ra_winstart = reqoff + reqsize;
    333      1.2      yamt 	ra->ra_winsize = MIN(RA_WINSIZE_MAX, ra->ra_winsize + reqsize);
    334      1.2      yamt 
    335      1.2      yamt done:;
    336      1.2      yamt }
    337      1.6      yamt 
    338      1.6      yamt int
    339      1.6      yamt uvm_readahead(struct uvm_object *uobj, off_t off, off_t size)
    340      1.6      yamt {
    341      1.6      yamt 
    342      1.6      yamt 	/*
    343      1.6      yamt 	 * don't allow too much read-ahead.
    344      1.6      yamt 	 */
    345      1.6      yamt 	if (size > RA_WINSIZE_MAX) {
    346      1.6      yamt 		size = RA_WINSIZE_MAX;
    347      1.6      yamt 	}
    348      1.6      yamt 	ra_startio(uobj, off, size);
    349      1.6      yamt 	return 0;
    350      1.6      yamt }
    351