Home | History | Annotate | Line # | Download | only in xen
      1 /*      $NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $      */
      2 
      3 /*
      4  * Copyright (c) 2006 Manuel Bouyer.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  *
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $");
     30 
     31 #include <sys/types.h>
     32 #include <sys/param.h>
     33 #include <sys/systm.h>
     34 #include <sys/kmem.h>
     35 #include <sys/queue.h>
     36 #include <sys/extent.h>
     37 #include <sys/kernel.h>
     38 #include <sys/mutex.h>
     39 #include <uvm/uvm.h>
     40 
     41 #include <xen/hypervisor.h>
     42 #include <xen/xen.h>
     43 #include <xen/granttables.h>
     44 
     45 #include "opt_xen.h"
     46 
     47 /* #define XENDEBUG */
     48 #ifdef XENDEBUG
     49 #define DPRINTF(x) printf x
     50 #else
     51 #define DPRINTF(x)
     52 #endif
     53 
     54 /* External tools reserve first few grant table entries. */
     55 #define NR_RESERVED_ENTRIES 8
     56 
     57 /* current supported version */
     58 int gnt_v = 0;
     59 #define GNT_ISV1 (gnt_v == 1)
     60 #define GNT_ISV2 (gnt_v == 2)
     61 /* Current number of frames making up the grant table */
     62 int gnt_nr_grant_frames;
     63 /* Maximum number of frames that can make up the grant table */
     64 int gnt_max_grant_frames;
     65 
     66 /* table of free grant entries */
     67 grant_ref_t *gnt_entries;
     68 /* last free entry */
     69 int last_gnt_entry;
     70 /* empty entry in the list */
     71 #define XENGNT_NO_ENTRY 0xffffffff
     72 
     73 /* VM address of the grant table */
     74 #define NR_GRANT_ENTRIES_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t))
     75 #define NR_GRANT_ENTRIES_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t))
     76 #define NR_GRANT_ENTRIES_PER_PAGE \
     77     ((gnt_v == 1) ? NR_GRANT_ENTRIES_PER_PAGE_V1 : NR_GRANT_ENTRIES_PER_PAGE_V2)
     78 #define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
     79 
     80 union {
     81 	grant_entry_v1_t *gntt_v1;
     82 	grant_entry_v2_t *gntt_v2;
     83 	void *gntt;
     84 } grant_table;
     85 
     86 /* Number of grant status frames (v2 only)*/
     87 int gnt_status_frames;
     88 
     89 grant_status_t *grant_status;
     90 kmutex_t grant_lock;
     91 
     92 static grant_ref_t xengnt_get_entry(void);
     93 static void xengnt_free_entry(grant_ref_t);
     94 static int xengnt_more_entries(void);
     95 static int xengnt_map_status(void);
     96 static bool xengnt_finish_init(void);
     97 
     98 void
     99 xengnt_init(void)
    100 {
    101 	struct gnttab_query_size query;
    102 	int rc;
    103 	int nr_grant_entries;
    104 	int i;
    105 
    106 	/* first try to see which version we support */
    107 	struct gnttab_set_version gntversion;
    108 	gnt_v = gntversion.version = 2;
    109 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
    110 	if (rc < 0 || gntversion.version != 2) {
    111 		aprint_debug("GNTTABOP_set_version 2 failed (%d), "
    112 		    "fall back to version 1\n", rc);
    113 		gnt_v = 1;
    114 	}
    115 
    116 	query.dom = DOMID_SELF;
    117 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
    118 	if ((rc < 0) || (query.status != GNTST_okay))
    119 		gnt_max_grant_frames = 4; /* Legacy max number of frames */
    120 	else
    121 		gnt_max_grant_frames = query.max_nr_frames;
    122 
    123 	/*
    124 	 * Always allocate max number of grant frames, never expand in runtime
    125 	 */
    126 	gnt_nr_grant_frames = gnt_max_grant_frames;
    127 
    128 	nr_grant_entries =
    129 	    gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
    130 
    131 	grant_table.gntt = (void *)uvm_km_alloc(kernel_map,
    132 	    gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
    133 	if (grant_table.gntt == NULL)
    134 		panic("xengnt_init() table no VM space");
    135 
    136 	gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
    137 	    KM_SLEEP);
    138 	for (i = 0; i <= nr_grant_entries; i++)
    139 		gnt_entries[i] = XENGNT_NO_ENTRY;
    140 
    141 	if (GNT_ISV2) {
    142 		gnt_status_frames =
    143 		    round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE;
    144 		grant_status = (void *)uvm_km_alloc(kernel_map,
    145 		    gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
    146 		if (grant_status == NULL)
    147 			panic("xengnt_init() status no VM space");
    148 	}
    149 
    150 	mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
    151 
    152 	xengnt_finish_init();
    153 }
    154 
    155 /*
    156  * Resume grant table state
    157  */
    158 bool
    159 xengnt_resume(void)
    160 {
    161 	int rc;
    162 
    163 	struct gnttab_set_version gntversion;
    164 	KASSERT(gnt_v == 1 || gnt_v == 2);
    165 	gntversion.version = gnt_v;
    166 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
    167 
    168 	if (GNT_ISV2) {
    169 		if (rc < 0 || gntversion.version != 2) {
    170 			panic("GNTTABOP_set_version 2 failed %d", rc);
    171 		}
    172 	} else {
    173 		if (rc == 0 && gntversion.version != 1) {
    174 			panic("GNTTABOP_set_version 1 failed");
    175 		}
    176 	}
    177 
    178 	return xengnt_finish_init();
    179 }
    180 
    181 static bool
    182 xengnt_finish_init(void)
    183 {
    184 	int previous_nr_grant_frames = gnt_nr_grant_frames;
    185 
    186 	last_gnt_entry = 0;
    187 	gnt_nr_grant_frames = 0;
    188 
    189 	mutex_enter(&grant_lock);
    190 	while (gnt_nr_grant_frames < previous_nr_grant_frames) {
    191 		if (xengnt_more_entries() != 0)
    192 			panic("xengnt_resume: can't restore grant frames");
    193 	}
    194 	if (GNT_ISV2)
    195 		xengnt_map_status();
    196 	mutex_exit(&grant_lock);
    197 	return true;
    198 }
    199 
    200 /*
    201  * Suspend grant table state
    202  */
    203 bool
    204 xengnt_suspend(void) {
    205 
    206 	int i;
    207 
    208 	mutex_enter(&grant_lock);
    209 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
    210 
    211 	for (i = 0; i < last_gnt_entry; i++) {
    212 		/* invalidate all grant entries (necessary for resume) */
    213 		gnt_entries[i] = XENGNT_NO_ENTRY;
    214 	}
    215 
    216 	/* Remove virtual => machine mapping for grant table */
    217 	pmap_kremove((vaddr_t)grant_table.gntt, gnt_nr_grant_frames * PAGE_SIZE);
    218 
    219 	if (GNT_ISV2) {
    220 		/* Remove virtual => machine mapping for status table */
    221 		pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE);
    222 	}
    223 
    224 	pmap_update(pmap_kernel());
    225 	mutex_exit(&grant_lock);
    226 	return true;
    227 }
    228 
    229 /*
    230  * Get status frames and enter them into the VA space.
    231  */
    232 static int
    233 xengnt_map_status(void)
    234 {
    235 	uint64_t *pages;
    236 	size_t sz;
    237 	KASSERT(mutex_owned(&grant_lock));
    238 	KASSERT(GNT_ISV2);
    239 
    240 	sz = gnt_status_frames * sizeof(*pages);
    241 	pages = kmem_alloc(sz, KM_NOSLEEP);
    242 	if (pages == NULL)
    243 		return ENOMEM;
    244 
    245 #ifdef XENPV
    246 	gnttab_get_status_frames_t getstatus;
    247 	int err;
    248 
    249 	getstatus.dom = DOMID_SELF;
    250 	getstatus.nr_frames = gnt_status_frames;
    251 	set_xen_guest_handle(getstatus.frame_list, pages);
    252 
    253 	/*
    254 	 * get the status frames, and return the list of their virtual
    255 	 * addresses in 'pages'
    256 	 */
    257 	if ((err = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
    258 	    &getstatus, 1)) != 0)
    259 		panic("%s: get_status_frames failed: %d", __func__, err);
    260 	if (getstatus.status != GNTST_okay) {
    261 		aprint_error("%s: get_status_frames returned %d\n",
    262 		    __func__, getstatus.status);
    263 		kmem_free(pages, sz);
    264 		return ENOMEM;
    265 	}
    266 #else /* XENPV */
    267 	for (int i = 0; i < gnt_status_frames; i++) {
    268 		struct vm_page *pg;
    269 		struct xen_add_to_physmap xmap;
    270 
    271 		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
    272 		pages[i] = atop(uvm_vm_page_to_phys(pg));
    273 
    274 		xmap.domid = DOMID_SELF;
    275 		xmap.space = XENMAPSPACE_grant_table;
    276 		xmap.idx = i | XENMAPIDX_grant_table_status;
    277 		xmap.gpfn = pages[i];
    278 
    279 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
    280 			panic("%s: Unable to add grant tables\n", __func__);
    281 	}
    282 #endif /* XENPV */
    283 	/*
    284 	 * map between status_table addresses and the machine addresses of
    285 	 * the status table frames
    286 	 */
    287 	for (int i = 0; i < gnt_status_frames; i++) {
    288 		pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE,
    289 		    ((paddr_t)pages[i]) << PAGE_SHIFT,
    290 		    VM_PROT_WRITE, 0);
    291 	}
    292 	pmap_update(pmap_kernel());
    293 
    294 	kmem_free(pages, sz);
    295 	return 0;
    296 }
    297 
    298 /*
    299  * Add another page to the grant table
    300  * Returns 0 on success, ENOMEM on failure
    301  */
    302 static int
    303 xengnt_more_entries(void)
    304 {
    305 	gnttab_setup_table_t setup;
    306 	u_long *pages;
    307 	int nframes_new = gnt_nr_grant_frames + 1;
    308 	int i, start_gnt;
    309 	size_t sz;
    310 	KASSERT(mutex_owned(&grant_lock));
    311 
    312 	if (gnt_nr_grant_frames == gnt_max_grant_frames)
    313 		return ENOMEM;
    314 
    315 	sz = nframes_new * sizeof(*pages);
    316 	pages = kmem_alloc(sz, KM_NOSLEEP);
    317 	if (pages == NULL)
    318 		return ENOMEM;
    319 
    320 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
    321 		/*
    322 		 * Note: Although we allocate space for the entire
    323 		 * table, in this mode we only update one entry at a
    324 		 * time.
    325 		 */
    326 		struct vm_page *pg;
    327 		struct xen_add_to_physmap xmap;
    328 
    329 		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
    330 		pages[gnt_nr_grant_frames] = atop(uvm_vm_page_to_phys(pg));
    331 
    332 		xmap.domid = DOMID_SELF;
    333 		xmap.space = XENMAPSPACE_grant_table;
    334 		xmap.idx = gnt_nr_grant_frames;
    335 		xmap.gpfn = pages[gnt_nr_grant_frames];
    336 
    337 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0)
    338 			panic("%s: Unable to add grant frames\n", __func__);
    339 
    340 	} else {
    341 		setup.dom = DOMID_SELF;
    342 		setup.nr_frames = nframes_new;
    343 		set_xen_guest_handle(setup.frame_list, pages);
    344 
    345 		/*
    346 		 * setup the grant table, made of nframes_new frames
    347 		 * and return the list of their virtual addresses
    348 		 * in 'pages'
    349 		 */
    350 		if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
    351 			panic("%s: setup table failed", __func__);
    352 		if (setup.status != GNTST_okay) {
    353 			aprint_error("%s: setup table returned %d\n",
    354 			    __func__, setup.status);
    355 			kmem_free(pages, sz);
    356 			return ENOMEM;
    357 		}
    358 	}
    359 
    360 	DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n",
    361 	    pages[gnt_nr_grant_frames],
    362 	    (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE));
    363 
    364 	/*
    365 	 * map between grant_table addresses and the machine addresses of
    366 	 * the grant table frames
    367 	 */
    368 	pmap_kenter_ma(((vaddr_t)grant_table.gntt) + gnt_nr_grant_frames * PAGE_SIZE,
    369 	    ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT,
    370 	    VM_PROT_WRITE, 0);
    371 	pmap_update(pmap_kernel());
    372 
    373 	/*
    374 	 * add the grant entries associated to the last grant table frame
    375 	 * and mark them as free. Prevent using the first grants (from 0 to 8)
    376 	 * since they are used by the tools.
    377 	 */
    378 	start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) <
    379 	            (NR_RESERVED_ENTRIES + 1) ?
    380 	            (NR_RESERVED_ENTRIES + 1) :
    381 	            (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
    382 	for (i = start_gnt;
    383 	    i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE;
    384 	    i++) {
    385 		KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
    386 		gnt_entries[last_gnt_entry] = i;
    387 		last_gnt_entry++;
    388 	}
    389 	gnt_nr_grant_frames = nframes_new;
    390 	kmem_free(pages, sz);
    391 	return 0;
    392 }
    393 
    394 /*
    395  * Returns a reference to the first free entry in grant table
    396  */
    397 static grant_ref_t
    398 xengnt_get_entry(void)
    399 {
    400 	grant_ref_t entry;
    401 	static struct timeval xengnt_nonmemtime;
    402 	static const struct timeval xengnt_nonmemintvl = {5,0};
    403 
    404 	KASSERT(mutex_owned(&grant_lock));
    405 
    406 	if (__predict_false(last_gnt_entry == 0)) {
    407 		if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
    408 			printf("xengnt_get_entry: out of grant "
    409 			    "table entries\n");
    410 		return XENGNT_NO_ENTRY;
    411 	}
    412 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
    413 	last_gnt_entry--;
    414 	entry = gnt_entries[last_gnt_entry];
    415 	gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY;
    416 	KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES);
    417 	KASSERT(last_gnt_entry >= 0);
    418 	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
    419 	return entry;
    420 }
    421 
    422 /*
    423  * Mark the grant table entry as free
    424  */
    425 static void
    426 xengnt_free_entry(grant_ref_t entry)
    427 {
    428 	mutex_enter(&grant_lock);
    429 	KASSERT(entry > NR_RESERVED_ENTRIES);
    430 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
    431 	KASSERT(last_gnt_entry >= 0);
    432 	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
    433 	gnt_entries[last_gnt_entry] = entry;
    434 	last_gnt_entry++;
    435 	mutex_exit(&grant_lock);
    436 }
    437 
    438 int
    439 xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp)
    440 {
    441 	mutex_enter(&grant_lock);
    442 
    443 	*entryp = xengnt_get_entry();
    444 	if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
    445 		mutex_exit(&grant_lock);
    446 		return ENOMEM;
    447 	}
    448 
    449 	if (GNT_ISV2) {
    450 		grant_table.gntt_v2[*entryp].full_page.frame = ma >> PAGE_SHIFT;
    451 		grant_table.gntt_v2[*entryp].hdr.domid = dom;
    452 		/*
    453 		 * ensure that the above values reach global visibility
    454 		 * before permitting frame's access (done when we set flags)
    455 		 */
    456 		xen_wmb();
    457 		grant_table.gntt_v2[*entryp].hdr.flags =
    458 		    GTF_permit_access | (ro ? GTF_readonly : 0);
    459 	} else {
    460 		grant_table.gntt_v1[*entryp].frame = ma >> PAGE_SHIFT;
    461 		grant_table.gntt_v1[*entryp].domid = dom;
    462 		/*
    463 		* ensure that the above values reach global visibility
    464 		* before permitting frame's access (done when we set flags)
    465 		*/
    466 		xen_wmb();
    467 		grant_table.gntt_v1[*entryp].flags =
    468 		   GTF_permit_access | (ro ? GTF_readonly : 0);
    469 	}
    470 	mutex_exit(&grant_lock);
    471 	return 0;
    472 }
    473 
    474 static inline uint16_t
    475 xen_atomic_cmpxchg16(volatile uint16_t *ptr, uint16_t  val, uint16_t newval)
    476 {
    477 	unsigned long result;
    478 
    479 	__asm volatile(__LOCK_PREFIX
    480 	   "cmpxchgw %w1,%2"
    481 	   :"=a" (result)
    482 	   :"q"(newval), "m" (*ptr), "0" (val)
    483 	   :"memory");
    484 
    485 	return result;
    486 }
    487 
    488 void
    489 xengnt_revoke_access(grant_ref_t entry)
    490 {
    491 	if (GNT_ISV2) {
    492 		grant_table.gntt_v2[entry].hdr.flags = 0;
    493 		xen_mb();	/* Concurrent access by hypervisor */
    494 
    495 		if (__predict_false(
    496 		    (grant_status[entry] & (GTF_reading|GTF_writing)) != 0)) {
    497 			printf("xengnt_revoke_access(%u): still in use\n",
    498 			    entry);
    499 		} else {
    500 
    501 			/*
    502 			 * The read of grant_status needs to have acquire
    503 			 * semantics.
    504 			 * Reads already have that on x86, so need only protect
    505 			 * against compiler reordering. May need full barrier
    506 			 * on other architectures.
    507 			 */
    508 			__insn_barrier();
    509 		}
    510 	} else {
    511 		uint16_t flags, nflags;
    512 
    513 		nflags = grant_table.gntt_v1[entry].flags;
    514 
    515 		do {
    516 		       if ((flags = nflags) & (GTF_reading|GTF_writing))
    517 			       panic("xengnt_revoke_access: still in use");
    518 		       nflags = xen_atomic_cmpxchg16(
    519 			    &grant_table.gntt_v1[entry].flags, flags, 0);
    520 		} while (nflags != flags);
    521 
    522 	}
    523 	xengnt_free_entry(entry);
    524 }
    525 
    526 int
    527 xengnt_status(grant_ref_t entry)
    528 {
    529 	if (GNT_ISV2)
    530 		return grant_status[entry] & (GTF_reading|GTF_writing);
    531 	else
    532 		return (grant_table.gntt_v1[entry].flags & (GTF_reading|GTF_writing));
    533 }
    534