pte.h revision 1.22 1 1.22 jld /* $NetBSD: pte.h,v 1.22 2010/04/06 20:43:57 jld Exp $ */
2 1.16 yamt
3 1.16 yamt /*
4 1.16 yamt * Copyright (c) 2001 Wasabi Systems, Inc.
5 1.16 yamt * All rights reserved.
6 1.16 yamt *
7 1.16 yamt * Written by Frank van der Linden for Wasabi Systems, Inc.
8 1.16 yamt *
9 1.16 yamt * Redistribution and use in source and binary forms, with or without
10 1.16 yamt * modification, are permitted provided that the following conditions
11 1.16 yamt * are met:
12 1.16 yamt * 1. Redistributions of source code must retain the above copyright
13 1.16 yamt * notice, this list of conditions and the following disclaimer.
14 1.16 yamt * 2. Redistributions in binary form must reproduce the above copyright
15 1.16 yamt * notice, this list of conditions and the following disclaimer in the
16 1.16 yamt * documentation and/or other materials provided with the distribution.
17 1.16 yamt * 3. All advertising materials mentioning features or use of this software
18 1.16 yamt * must display the following acknowledgement:
19 1.16 yamt * This product includes software developed for the NetBSD Project by
20 1.16 yamt * Wasabi Systems, Inc.
21 1.16 yamt * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 1.16 yamt * or promote products derived from this software without specific prior
23 1.16 yamt * written permission.
24 1.16 yamt *
25 1.16 yamt * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 1.16 yamt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 1.16 yamt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 1.16 yamt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 1.16 yamt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 1.16 yamt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 1.16 yamt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 1.16 yamt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 1.16 yamt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 1.16 yamt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 1.16 yamt * POSSIBILITY OF SUCH DAMAGE.
36 1.16 yamt */
37 1.11 thorpej
38 1.10 mrg /*
39 1.10 mrg *
40 1.10 mrg * Copyright (c) 1997 Charles D. Cranor and Washington University.
41 1.1 cgd * All rights reserved.
42 1.1 cgd *
43 1.1 cgd * Redistribution and use in source and binary forms, with or without
44 1.1 cgd * modification, are permitted provided that the following conditions
45 1.1 cgd * are met:
46 1.1 cgd * 1. Redistributions of source code must retain the above copyright
47 1.1 cgd * notice, this list of conditions and the following disclaimer.
48 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
49 1.1 cgd * notice, this list of conditions and the following disclaimer in the
50 1.1 cgd * documentation and/or other materials provided with the distribution.
51 1.1 cgd * 3. All advertising materials mentioning features or use of this software
52 1.10 mrg * must display the following acknowledgment:
53 1.10 mrg * This product includes software developed by Charles D. Cranor and
54 1.10 mrg * Washington University.
55 1.10 mrg * 4. The name of the author may not be used to endorse or promote products
56 1.10 mrg * derived from this software without specific prior written permission.
57 1.10 mrg *
58 1.10 mrg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
59 1.10 mrg * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
60 1.10 mrg * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
61 1.10 mrg * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
62 1.10 mrg * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
63 1.10 mrg * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
64 1.10 mrg * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
65 1.10 mrg * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
66 1.10 mrg * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
67 1.10 mrg * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 1.1 cgd */
69 1.1 cgd
70 1.1 cgd /*
71 1.10 mrg * pte.h rewritten by chuck based on the jolitz version, plus random
72 1.10 mrg * info on the pentium and other processors found on the net. the
73 1.10 mrg * goal of this rewrite is to provide enough documentation on the MMU
74 1.10 mrg * hardware that the reader will be able to understand it without having
75 1.10 mrg * to refer to a hardware manual.
76 1.1 cgd */
77 1.1 cgd
78 1.3 andrew #ifndef _I386_PTE_H_
79 1.3 andrew #define _I386_PTE_H_
80 1.17 bouyer #ifdef _KERNEL_OPT
81 1.17 bouyer #include "opt_xen.h"
82 1.17 bouyer #endif
83 1.3 andrew
84 1.10 mrg /*
85 1.20 dyoung * i386 MMU hardware structure (without PAE extension):
86 1.10 mrg *
87 1.10 mrg * the i386 MMU is a two-level MMU which maps 4GB of virtual memory.
88 1.10 mrg * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium
89 1.10 mrg * processors can support a 4MB pagesize as well.
90 1.10 mrg *
91 1.10 mrg * the first level table (segment table?) is called a "page directory"
92 1.10 mrg * and it contains 1024 page directory entries (PDEs). each PDE is
93 1.10 mrg * 4 bytes (an int), so a PD fits in a single 4K page. this page is
94 1.10 mrg * the page directory page (PDP). each PDE in a PDP maps 4MB of space
95 1.10 mrg * (1024 * 4MB = 4GB). a PDE contains the physical address of the
96 1.10 mrg * second level table: the page table. or, if 4MB pages are being used,
97 1.10 mrg * then the PDE contains the PA of the 4MB page being mapped.
98 1.10 mrg *
99 1.10 mrg * a page table consists of 1024 page table entries (PTEs). each PTE is
100 1.10 mrg * 4 bytes (an int), so a page table also fits in a single 4K page. a
101 1.10 mrg * 4K page being used as a page table is called a page table page (PTP).
102 1.10 mrg * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains
103 1.10 mrg * the physical address of the page it maps and some flag bits (described
104 1.10 mrg * below).
105 1.10 mrg *
106 1.10 mrg * the processor has a special register, "cr3", which points to the
107 1.10 mrg * the PDP which is currently controlling the mappings of the virtual
108 1.10 mrg * address space.
109 1.10 mrg *
110 1.10 mrg * the following picture shows the translation process for a 4K page:
111 1.10 mrg *
112 1.10 mrg * %cr3 register [PA of PDP]
113 1.10 mrg * |
114 1.10 mrg * |
115 1.10 mrg * | bits <31-22> of VA bits <21-12> of VA bits <11-0>
116 1.10 mrg * | index the PDP (0 - 1023) index the PTP are the page offset
117 1.10 mrg * | | | |
118 1.10 mrg * | v | |
119 1.10 mrg * +--->+----------+ | |
120 1.10 mrg * | PD Page | PA of v |
121 1.10 mrg * | |---PTP-------->+------------+ |
122 1.10 mrg * | 1024 PDE | | page table |--PTE--+ |
123 1.10 mrg * | entries | | (aka PTP) | | |
124 1.10 mrg * +----------+ | 1024 PTE | | |
125 1.10 mrg * | entries | | |
126 1.10 mrg * +------------+ | |
127 1.10 mrg * | |
128 1.10 mrg * bits <31-12> bits <11-0>
129 1.10 mrg * p h y s i c a l a d d r
130 1.10 mrg *
131 1.10 mrg * the i386 caches PTEs in a TLB. it is important to flush out old
132 1.10 mrg * TLB mappings when making a change to a mappings. writing to the
133 1.10 mrg * %cr3 will flush the entire TLB. newer processors also have an
134 1.10 mrg * instruction that will invalidate the mapping of a single page (which
135 1.10 mrg * is useful if you are changing a single mappings because it preserves
136 1.10 mrg * all the cached TLB entries).
137 1.10 mrg *
138 1.10 mrg * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
139 1.10 mrg * the rest of the PTE is defined as follows:
140 1.10 mrg * bit# name use
141 1.10 mrg * 11 n/a available for OS use, hardware ignores it
142 1.10 mrg * 10 n/a available for OS use, hardware ignores it
143 1.10 mrg * 9 n/a available for OS use, hardware ignores it
144 1.10 mrg * 8 G global bit (see discussion below)
145 1.10 mrg * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>)
146 1.10 mrg * 6 D dirty (modified) page
147 1.10 mrg * 5 A accessed (referenced) page
148 1.10 mrg * 4 PCD cache disable
149 1.10 mrg * 3 PWT prevent write through (cache)
150 1.10 mrg * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s)
151 1.10 mrg * 1 R/W read/write bit (0=read only, 1=read-write)
152 1.10 mrg * 0 P present (valid)
153 1.10 mrg *
154 1.10 mrg * notes:
155 1.10 mrg * - PS is only supported on newer processors
156 1.10 mrg * - PTEs with the G bit are global in the sense that they are not
157 1.10 mrg * flushed from the TLB when %cr3 is written (to flush, use the
158 1.10 mrg * "flush single page" instruction). this is only supported on
159 1.10 mrg * newer processors. this bit can be used to keep the kernel's
160 1.10 mrg * TLB entries around while context switching. since the kernel
161 1.10 mrg * is mapped into all processes at the same place it does not make
162 1.10 mrg * sense to flush these entries when switching from one process'
163 1.10 mrg * pmap to another.
164 1.17 bouyer *
165 1.20 dyoung * The PAE extension extends the size of the PTE to 64 bits (52bits physical
166 1.17 bouyer * address) and is compatible with the amd64 PTE format. The first level
167 1.22 jld * maps 2M, the second 1G, so a third level page table is introduced to
168 1.17 bouyer * map the 4GB virtual address space. This PD has only 4 entries.
169 1.22 jld * We can't use recursive mapping at level 3 to map the PD pages, as this
170 1.22 jld * would eat one GB of address space. In addition, Xen imposes restrictions
171 1.17 bouyer * on the entries we put in the L3 page (for example, the page pointed to by
172 1.17 bouyer * the last slot can't be shared among different L3 pages), which makes
173 1.17 bouyer * handling this L3 page in the same way we do for L2 on i386 (or L4 on amd64)
174 1.17 bouyer * difficult. For most things we'll just pretend to have only 2 levels,
175 1.17 bouyer * with the 2 high bits of the L2 index being in fact the index in the
176 1.17 bouyer * L3.
177 1.10 mrg */
178 1.10 mrg
179 1.11 thorpej #if !defined(_LOCORE)
180 1.10 mrg
181 1.10 mrg /*
182 1.10 mrg * here we define the data types for PDEs and PTEs
183 1.10 mrg */
184 1.17 bouyer #ifdef PAE
185 1.17 bouyer typedef uint64_t pd_entry_t; /* PDE */
186 1.17 bouyer typedef uint64_t pt_entry_t; /* PTE */
187 1.17 bouyer #else
188 1.15 perry typedef uint32_t pd_entry_t; /* PDE */
189 1.15 perry typedef uint32_t pt_entry_t; /* PTE */
190 1.17 bouyer #endif
191 1.10 mrg
192 1.1 cgd #endif
193 1.1 cgd
194 1.10 mrg /*
195 1.10 mrg * now we define various for playing with virtual addresses
196 1.10 mrg */
197 1.10 mrg
198 1.17 bouyer #ifdef PAE
199 1.17 bouyer #define L1_SHIFT 12
200 1.17 bouyer #define L2_SHIFT 21
201 1.17 bouyer #define L3_SHIFT 30
202 1.17 bouyer #define NBPD_L1 (1ULL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
203 1.17 bouyer #define NBPD_L2 (1ULL << L2_SHIFT) /* # bytes mapped by L2 ent (2MB) */
204 1.17 bouyer #define NBPD_L3 (1ULL << L3_SHIFT) /* # bytes mapped by L3 ent (1GB) */
205 1.17 bouyer
206 1.17 bouyer #define L3_MASK 0xc0000000
207 1.17 bouyer #define L2_REALMASK 0x3fe00000
208 1.17 bouyer #define L2_MASK (L2_REALMASK | L3_MASK)
209 1.17 bouyer #define L1_MASK 0x001ff000
210 1.17 bouyer
211 1.17 bouyer #define L3_FRAME (L3_MASK)
212 1.17 bouyer #define L2_FRAME (L3_FRAME | L2_MASK)
213 1.17 bouyer #define L1_FRAME (L2_FRAME|L1_MASK)
214 1.17 bouyer
215 1.17 bouyer #define PG_FRAME 0x000ffffffffff000ULL /* page frame mask */
216 1.17 bouyer #define PG_LGFRAME 0x000fffffffe00000ULL /* large (2MB) page frame mask */
217 1.17 bouyer
218 1.17 bouyer /* macros to get real L2 and L3 index, from our "extended" L2 index */
219 1.17 bouyer #define l2tol3(idx) ((idx) >> (L3_SHIFT - L2_SHIFT))
220 1.17 bouyer #define l2tol2(idx) ((idx) & (L2_REALMASK >> L2_SHIFT))
221 1.17 bouyer #else /* PAE */
222 1.17 bouyer #define L1_SHIFT 12
223 1.16 yamt #define L2_SHIFT 22
224 1.21 jym #define NBPD_L1 (1UL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
225 1.21 jym #define NBPD_L2 (1UL << L2_SHIFT) /* # bytes mapped by L2 ent (4MB) */
226 1.16 yamt
227 1.16 yamt #define L2_MASK 0xffc00000
228 1.16 yamt #define L1_MASK 0x003ff000
229 1.16 yamt
230 1.16 yamt #define L2_FRAME (L2_MASK)
231 1.16 yamt #define L1_FRAME (L2_FRAME|L1_MASK)
232 1.1 cgd
233 1.17 bouyer #define PG_FRAME 0xfffff000 /* page frame mask */
234 1.17 bouyer #define PG_LGFRAME 0xffc00000 /* large (4MB) page frame mask */
235 1.17 bouyer
236 1.17 bouyer #endif /* PAE */
237 1.10 mrg /*
238 1.10 mrg * here we define the bits of the PDE/PTE, as described above:
239 1.10 mrg *
240 1.10 mrg * XXXCDC: need to rename these (PG_u == ugly).
241 1.10 mrg */
242 1.10 mrg
243 1.10 mrg #define PG_V 0x00000001 /* valid entry */
244 1.10 mrg #define PG_RO 0x00000000 /* read-only page */
245 1.10 mrg #define PG_RW 0x00000002 /* read-write page */
246 1.10 mrg #define PG_u 0x00000004 /* user accessible page */
247 1.14 chs #define PG_PROT 0x00000806 /* all protection bits */
248 1.4 mycroft #define PG_N 0x00000018 /* non-cacheable */
249 1.4 mycroft #define PG_U 0x00000020 /* has been used */
250 1.4 mycroft #define PG_M 0x00000040 /* has been modified */
251 1.10 mrg #define PG_PS 0x00000080 /* 4MB page size */
252 1.10 mrg #define PG_G 0x00000100 /* global, don't TLB flush */
253 1.10 mrg #define PG_AVAIL1 0x00000200 /* ignored by hardware */
254 1.10 mrg #define PG_AVAIL2 0x00000400 /* ignored by hardware */
255 1.10 mrg #define PG_AVAIL3 0x00000800 /* ignored by hardware */
256 1.1 cgd
257 1.10 mrg /*
258 1.10 mrg * various short-hand protection codes
259 1.10 mrg */
260 1.10 mrg
261 1.10 mrg #define PG_KR 0x00000000 /* kernel read-only */
262 1.10 mrg #define PG_KW 0x00000002 /* kernel read-write */
263 1.19 cegger
264 1.19 cegger #ifdef PAE
265 1.19 cegger #define PG_NX 0x8000000000000000 /* No-execute */
266 1.19 cegger #else
267 1.16 yamt #define PG_NX 0 /* dummy */
268 1.19 cegger #endif
269 1.10 mrg
270 1.10 mrg /*
271 1.10 mrg * page protection exception bits
272 1.10 mrg */
273 1.10 mrg
274 1.10 mrg #define PGEX_P 0x01 /* protection violation (vs. no mapping) */
275 1.10 mrg #define PGEX_W 0x02 /* exception during a write cycle */
276 1.10 mrg #define PGEX_U 0x04 /* exception while in user mode (upl) */
277 1.3 andrew
278 1.3 andrew #endif /* _I386_PTE_H_ */
279