Home | History | Annotate | Line # | Download | only in include
pte.h revision 1.21.2.2
      1  1.21.2.2    rmind /*	$NetBSD: pte.h,v 1.21.2.2 2011/03/05 20:50:41 rmind Exp $	*/
      2      1.16     yamt 
      3      1.16     yamt /*
      4      1.16     yamt  * Copyright (c) 2001 Wasabi Systems, Inc.
      5      1.16     yamt  * All rights reserved.
      6      1.16     yamt  *
      7      1.16     yamt  * Written by Frank van der Linden for Wasabi Systems, Inc.
      8      1.16     yamt  *
      9      1.16     yamt  * Redistribution and use in source and binary forms, with or without
     10      1.16     yamt  * modification, are permitted provided that the following conditions
     11      1.16     yamt  * are met:
     12      1.16     yamt  * 1. Redistributions of source code must retain the above copyright
     13      1.16     yamt  *    notice, this list of conditions and the following disclaimer.
     14      1.16     yamt  * 2. Redistributions in binary form must reproduce the above copyright
     15      1.16     yamt  *    notice, this list of conditions and the following disclaimer in the
     16      1.16     yamt  *    documentation and/or other materials provided with the distribution.
     17      1.16     yamt  * 3. All advertising materials mentioning features or use of this software
     18      1.16     yamt  *    must display the following acknowledgement:
     19      1.16     yamt  *      This product includes software developed for the NetBSD Project by
     20      1.16     yamt  *      Wasabi Systems, Inc.
     21      1.16     yamt  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22      1.16     yamt  *    or promote products derived from this software without specific prior
     23      1.16     yamt  *    written permission.
     24      1.16     yamt  *
     25      1.16     yamt  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26      1.16     yamt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27      1.16     yamt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28      1.16     yamt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29      1.16     yamt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30      1.16     yamt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31      1.16     yamt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32      1.16     yamt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33      1.16     yamt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34      1.16     yamt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35      1.16     yamt  * POSSIBILITY OF SUCH DAMAGE.
     36      1.16     yamt  */
     37      1.11  thorpej 
     38      1.10      mrg /*
     39      1.10      mrg  * Copyright (c) 1997 Charles D. Cranor and Washington University.
     40       1.1      cgd  * All rights reserved.
     41       1.1      cgd  *
     42       1.1      cgd  * Redistribution and use in source and binary forms, with or without
     43       1.1      cgd  * modification, are permitted provided that the following conditions
     44       1.1      cgd  * are met:
     45       1.1      cgd  * 1. Redistributions of source code must retain the above copyright
     46       1.1      cgd  *    notice, this list of conditions and the following disclaimer.
     47       1.1      cgd  * 2. Redistributions in binary form must reproduce the above copyright
     48       1.1      cgd  *    notice, this list of conditions and the following disclaimer in the
     49       1.1      cgd  *    documentation and/or other materials provided with the distribution.
     50      1.10      mrg  *
     51      1.10      mrg  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     52      1.10      mrg  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     53      1.10      mrg  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     54      1.10      mrg  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     55      1.10      mrg  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     56      1.10      mrg  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     57      1.10      mrg  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     58      1.10      mrg  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     59      1.10      mrg  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     60      1.10      mrg  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     61       1.1      cgd  */
     62       1.1      cgd 
     63       1.1      cgd /*
     64      1.10      mrg  * pte.h rewritten by chuck based on the jolitz version, plus random
     65      1.10      mrg  * info on the pentium and other processors found on the net.   the
     66      1.10      mrg  * goal of this rewrite is to provide enough documentation on the MMU
     67      1.10      mrg  * hardware that the reader will be able to understand it without having
     68      1.10      mrg  * to refer to a hardware manual.
     69       1.1      cgd  */
     70       1.1      cgd 
     71       1.3   andrew #ifndef _I386_PTE_H_
     72       1.3   andrew #define _I386_PTE_H_
     73      1.17   bouyer #ifdef _KERNEL_OPT
     74      1.17   bouyer #include "opt_xen.h"
     75      1.17   bouyer #endif
     76       1.3   andrew 
     77      1.10      mrg /*
     78      1.20   dyoung  * i386 MMU hardware structure (without PAE extension):
     79      1.10      mrg  *
     80      1.10      mrg  * the i386 MMU is a two-level MMU which maps 4GB of virtual memory.
     81      1.10      mrg  * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium
     82      1.10      mrg  * processors can support a 4MB pagesize as well.
     83      1.10      mrg  *
     84      1.10      mrg  * the first level table (segment table?) is called a "page directory"
     85      1.10      mrg  * and it contains 1024 page directory entries (PDEs).   each PDE is
     86      1.10      mrg  * 4 bytes (an int), so a PD fits in a single 4K page.   this page is
     87      1.10      mrg  * the page directory page (PDP).  each PDE in a PDP maps 4MB of space
     88      1.10      mrg  * (1024 * 4MB = 4GB).   a PDE contains the physical address of the
     89      1.10      mrg  * second level table: the page table.   or, if 4MB pages are being used,
     90      1.10      mrg  * then the PDE contains the PA of the 4MB page being mapped.
     91      1.10      mrg  *
     92      1.10      mrg  * a page table consists of 1024 page table entries (PTEs).  each PTE is
     93      1.10      mrg  * 4 bytes (an int), so a page table also fits in a single 4K page.  a
     94      1.10      mrg  * 4K page being used as a page table is called a page table page (PTP).
     95      1.10      mrg  * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB).   a PTE contains
     96      1.10      mrg  * the physical address of the page it maps and some flag bits (described
     97      1.10      mrg  * below).
     98      1.10      mrg  *
     99      1.10      mrg  * the processor has a special register, "cr3", which points to the
    100      1.10      mrg  * the PDP which is currently controlling the mappings of the virtual
    101      1.10      mrg  * address space.
    102      1.10      mrg  *
    103      1.10      mrg  * the following picture shows the translation process for a 4K page:
    104      1.10      mrg  *
    105      1.10      mrg  * %cr3 register [PA of PDP]
    106      1.10      mrg  *      |
    107      1.10      mrg  *      |
    108      1.10      mrg  *      |   bits <31-22> of VA         bits <21-12> of VA   bits <11-0>
    109      1.10      mrg  *      |   index the PDP (0 - 1023)   index the PTP        are the page offset
    110      1.10      mrg  *      |         |                           |                  |
    111      1.10      mrg  *      |         v                           |                  |
    112      1.10      mrg  *      +--->+----------+                     |                  |
    113      1.10      mrg  *           | PD Page  |   PA of             v                  |
    114      1.10      mrg  *           |          |---PTP-------->+------------+           |
    115      1.10      mrg  *           | 1024 PDE |               | page table |--PTE--+   |
    116      1.10      mrg  *           | entries  |               | (aka PTP)  |       |   |
    117      1.10      mrg  *           +----------+               | 1024 PTE   |       |   |
    118      1.10      mrg  *                                      | entries    |       |   |
    119      1.10      mrg  *                                      +------------+       |   |
    120      1.10      mrg  *                                                           |   |
    121      1.10      mrg  *                                                bits <31-12>   bits <11-0>
    122      1.10      mrg  *                                                p h y s i c a l  a d d r
    123      1.10      mrg  *
    124      1.10      mrg  * the i386 caches PTEs in a TLB.   it is important to flush out old
    125      1.10      mrg  * TLB mappings when making a change to a mappings.   writing to the
    126      1.10      mrg  * %cr3 will flush the entire TLB.    newer processors also have an
    127      1.10      mrg  * instruction that will invalidate the mapping of a single page (which
    128      1.10      mrg  * is useful if you are changing a single mappings because it preserves
    129      1.10      mrg  * all the cached TLB entries).
    130      1.10      mrg  *
    131      1.10      mrg  * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
    132      1.10      mrg  * the rest of the PTE is defined as follows:
    133      1.10      mrg  *   bit#	name	use
    134      1.10      mrg  *   11		n/a	available for OS use, hardware ignores it
    135      1.10      mrg  *   10		n/a	available for OS use, hardware ignores it
    136      1.10      mrg  *   9		n/a	available for OS use, hardware ignores it
    137      1.10      mrg  *   8		G	global bit (see discussion below)
    138      1.10      mrg  *   7		PS	page size [for PDEs] (0=4k, 1=4M <if supported>)
    139      1.10      mrg  *   6		D	dirty (modified) page
    140      1.10      mrg  *   5		A	accessed (referenced) page
    141      1.10      mrg  *   4		PCD	cache disable
    142      1.10      mrg  *   3		PWT	prevent write through (cache)
    143      1.10      mrg  *   2		U/S	user/supervisor bit (0=supervisor only, 1=both u&s)
    144      1.10      mrg  *   1		R/W	read/write bit (0=read only, 1=read-write)
    145      1.10      mrg  *   0		P	present (valid)
    146      1.10      mrg  *
    147      1.10      mrg  * notes:
    148      1.10      mrg  *  - PS is only supported on newer processors
    149      1.10      mrg  *  - PTEs with the G bit are global in the sense that they are not
    150      1.10      mrg  *    flushed from the TLB when %cr3 is written (to flush, use the
    151      1.10      mrg  *    "flush single page" instruction).   this is only supported on
    152      1.10      mrg  *    newer processors.    this bit can be used to keep the kernel's
    153      1.10      mrg  *    TLB entries around while context switching.   since the kernel
    154      1.10      mrg  *    is mapped into all processes at the same place it does not make
    155      1.10      mrg  *    sense to flush these entries when switching from one process'
    156      1.10      mrg  *    pmap to another.
    157      1.17   bouyer  *
    158      1.20   dyoung  * The PAE extension extends the size of the PTE to 64 bits (52bits physical
    159      1.17   bouyer  * address) and is compatible with the amd64 PTE format. The first level
    160  1.21.2.1    rmind  * maps 2M, the second 1G, so a third level page table is introduced to
    161      1.17   bouyer  * map the 4GB virtual address space. This PD has only 4 entries.
    162  1.21.2.1    rmind  * We can't use recursive mapping at level 3 to map the PD pages, as this
    163  1.21.2.1    rmind  * would eat one GB of address space. In addition, Xen imposes restrictions
    164      1.17   bouyer  * on the entries we put in the L3 page (for example, the page pointed to by
    165      1.17   bouyer  * the last slot can't be shared among different L3 pages), which makes
    166      1.17   bouyer  * handling this L3 page in the same way we do for L2 on i386 (or L4 on amd64)
    167      1.17   bouyer  * difficult. For most things we'll just pretend to have only 2 levels,
    168      1.17   bouyer  * with the 2 high bits of the L2 index being in fact the index in the
    169      1.17   bouyer  * L3.
    170      1.10      mrg  */
    171      1.10      mrg 
    172      1.11  thorpej #if !defined(_LOCORE)
    173      1.10      mrg 
    174      1.10      mrg /*
    175      1.10      mrg  * here we define the data types for PDEs and PTEs
    176      1.10      mrg  */
    177      1.17   bouyer #ifdef PAE
    178      1.17   bouyer typedef uint64_t pd_entry_t;		/* PDE */
    179      1.17   bouyer typedef uint64_t pt_entry_t;		/* PTE */
    180      1.17   bouyer #else
    181      1.15    perry typedef uint32_t pd_entry_t;		/* PDE */
    182      1.15    perry typedef uint32_t pt_entry_t;		/* PTE */
    183      1.17   bouyer #endif
    184      1.10      mrg 
    185       1.1      cgd #endif
    186       1.1      cgd 
    187      1.10      mrg /*
    188      1.10      mrg  * now we define various for playing with virtual addresses
    189      1.10      mrg  */
    190      1.10      mrg 
    191      1.17   bouyer #ifdef PAE
    192      1.17   bouyer #define	L1_SHIFT	12
    193      1.17   bouyer #define	L2_SHIFT	21
    194      1.17   bouyer #define	L3_SHIFT	30
    195      1.17   bouyer #define	NBPD_L1		(1ULL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
    196      1.17   bouyer #define	NBPD_L2		(1ULL << L2_SHIFT) /* # bytes mapped by L2 ent (2MB) */
    197      1.17   bouyer #define	NBPD_L3		(1ULL << L3_SHIFT) /* # bytes mapped by L3 ent (1GB) */
    198      1.17   bouyer 
    199      1.17   bouyer #define	L3_MASK		0xc0000000
    200      1.17   bouyer #define	L2_REALMASK	0x3fe00000
    201      1.17   bouyer #define	L2_MASK		(L2_REALMASK | L3_MASK)
    202      1.17   bouyer #define	L1_MASK		0x001ff000
    203      1.17   bouyer 
    204      1.17   bouyer #define	L3_FRAME	(L3_MASK)
    205      1.17   bouyer #define	L2_FRAME	(L3_FRAME | L2_MASK)
    206      1.17   bouyer #define	L1_FRAME	(L2_FRAME|L1_MASK)
    207      1.17   bouyer 
    208      1.17   bouyer #define	PG_FRAME	0x000ffffffffff000ULL /* page frame mask */
    209      1.17   bouyer #define	PG_LGFRAME	0x000fffffffe00000ULL /* large (2MB) page frame mask */
    210      1.17   bouyer 
    211      1.17   bouyer /* macros to get real L2 and L3 index, from our "extended" L2 index */
    212      1.17   bouyer #define l2tol3(idx)	((idx) >> (L3_SHIFT - L2_SHIFT))
    213      1.17   bouyer #define l2tol2(idx)	((idx) & (L2_REALMASK >>  L2_SHIFT))
    214  1.21.2.2    rmind 
    215      1.17   bouyer #else /* PAE */
    216  1.21.2.2    rmind 
    217      1.17   bouyer #define	L1_SHIFT	12
    218      1.16     yamt #define	L2_SHIFT	22
    219      1.21      jym #define	NBPD_L1		(1UL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
    220      1.21      jym #define	NBPD_L2		(1UL << L2_SHIFT) /* # bytes mapped by L2 ent (4MB) */
    221      1.16     yamt 
    222      1.16     yamt #define L2_MASK		0xffc00000
    223      1.16     yamt #define L1_MASK		0x003ff000
    224      1.16     yamt 
    225      1.16     yamt #define L2_FRAME	(L2_MASK)
    226      1.16     yamt #define L1_FRAME	(L2_FRAME|L1_MASK)
    227       1.1      cgd 
    228      1.17   bouyer #define	PG_FRAME	0xfffff000	/* page frame mask */
    229      1.17   bouyer #define	PG_LGFRAME	0xffc00000	/* large (4MB) page frame mask */
    230      1.17   bouyer 
    231      1.17   bouyer #endif /* PAE */
    232      1.10      mrg /*
    233      1.10      mrg  * here we define the bits of the PDE/PTE, as described above:
    234      1.10      mrg  *
    235      1.10      mrg  * XXXCDC: need to rename these (PG_u == ugly).
    236      1.10      mrg  */
    237      1.10      mrg 
    238      1.10      mrg #define	PG_V		0x00000001	/* valid entry */
    239      1.10      mrg #define	PG_RO		0x00000000	/* read-only page */
    240      1.10      mrg #define	PG_RW		0x00000002	/* read-write page */
    241      1.10      mrg #define	PG_u		0x00000004	/* user accessible page */
    242      1.14      chs #define	PG_PROT		0x00000806	/* all protection bits */
    243  1.21.2.2    rmind #define PG_WT		0x00000008	/* write through */
    244  1.21.2.2    rmind #define	PG_N		0x00000010	/* non-cacheable */
    245       1.4  mycroft #define	PG_U		0x00000020	/* has been used */
    246       1.4  mycroft #define	PG_M		0x00000040	/* has been modified */
    247  1.21.2.2    rmind #define PG_PAT		0x00000080	/* PAT (on pte) */
    248  1.21.2.2    rmind #define PG_PS		0x00000080	/* 4MB page size (2MB for PAE) */
    249      1.10      mrg #define PG_G		0x00000100	/* global, don't TLB flush */
    250      1.10      mrg #define PG_AVAIL1	0x00000200	/* ignored by hardware */
    251      1.10      mrg #define PG_AVAIL2	0x00000400	/* ignored by hardware */
    252      1.10      mrg #define PG_AVAIL3	0x00000800	/* ignored by hardware */
    253  1.21.2.2    rmind #define PG_LGPAT	0x00001000	/* PAT on large pages */
    254       1.1      cgd 
    255      1.10      mrg /*
    256      1.10      mrg  * various short-hand protection codes
    257      1.10      mrg  */
    258      1.10      mrg 
    259      1.10      mrg #define	PG_KR		0x00000000	/* kernel read-only */
    260      1.10      mrg #define	PG_KW		0x00000002	/* kernel read-write */
    261      1.19   cegger 
    262      1.19   cegger #ifdef PAE
    263  1.21.2.2    rmind #define	PG_NX		0x8000000000000000ULL /* No-execute */
    264      1.19   cegger #else
    265      1.16     yamt #define	PG_NX		0		/* dummy */
    266      1.19   cegger #endif
    267      1.10      mrg 
    268  1.21.2.2    rmind #include <x86/pte.h>
    269       1.3   andrew 
    270       1.3   andrew #endif /* _I386_PTE_H_ */
    271