Home | History | Annotate | Line # | Download | only in xen
      1 /*	$NetBSD: xen_machdep.c,v 1.29 2023/10/17 10:24:11 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2006 Manuel Bouyer.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  *
     26  */
     27 
     28 /*
     29  *
     30  * Copyright (c) 2004 Christian Limpach.
     31  * All rights reserved.
     32  *
     33  * Redistribution and use in source and binary forms, with or without
     34  * modification, are permitted provided that the following conditions
     35  * are met:
     36  * 1. Redistributions of source code must retain the above copyright
     37  *    notice, this list of conditions and the following disclaimer.
     38  * 2. Redistributions in binary form must reproduce the above copyright
     39  *    notice, this list of conditions and the following disclaimer in the
     40  *    documentation and/or other materials provided with the distribution.
     41  *
     42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     43  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     45  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     46  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     47  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     48  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     49  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     50  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     51  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     52  */
     53 
     54 
     55 #include <sys/cdefs.h>
     56 __KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.29 2023/10/17 10:24:11 riastradh Exp $");
     57 
     58 #include "opt_xen.h"
     59 
     60 #include <sys/param.h>
     61 #include <sys/systm.h>
     62 #include <sys/boot_flag.h>
     63 #include <sys/conf.h>
     64 #include <sys/disk.h>
     65 #include <sys/device.h>
     66 #include <sys/mount.h>
     67 #include <sys/reboot.h>
     68 #include <sys/timetc.h>
     69 #include <sys/sysctl.h>
     70 #include <sys/pmf.h>
     71 #include <sys/xcall.h>
     72 
     73 #include <dev/cons.h>
     74 
     75 #include <xen/intr.h>
     76 #include <xen/hypervisor.h>
     77 #include <xen/shutdown_xenbus.h>
     78 #include <xen/include/public/version.h>
     79 
     80 #include <machine/pmap_private.h>
     81 
     82 #define DPRINTK(x) printk x
     83 #if 0
     84 #define DPRINTK(x)
     85 #endif
     86 
     87 #ifdef DEBUG_GEOM
     88 #define DPRINTF(a) printf a
     89 #else
     90 #define DPRINTF(a)
     91 #endif
     92 
     93 
     94 bool xen_suspend_allow;
     95 
     96 void
     97 xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
     98 {
     99 	char _cmd_line[256], *cmd_line, *opt, *s;
    100 	int b, i, ipidx = 0;
    101 	uint32_t xi_ip[5];
    102 	size_t len;
    103 
    104 	len = strlcpy(_cmd_line, xen_start_info.cmd_line, sizeof(_cmd_line));
    105 	if (len > sizeof(_cmd_line)) {
    106 		printf("command line exceeded limit of 255 chars. Truncated.\n");
    107 	}
    108 	cmd_line = _cmd_line;
    109 
    110 	switch (what) {
    111 	case XEN_PARSE_BOOTDEV:
    112 		xcp->xcp_bootdev[0] = 0;
    113 		break;
    114 	case XEN_PARSE_CONSOLE:
    115 		xcp->xcp_console[0] = 0;
    116 		break;
    117 	}
    118 
    119 	while (cmd_line && *cmd_line) {
    120 		opt = cmd_line;
    121 		cmd_line = strchr(opt, ' ');
    122 		if (cmd_line)
    123 			*cmd_line = 0;
    124 
    125 		switch (what) {
    126 		case XEN_PARSE_BOOTDEV:
    127 			if (strncasecmp(opt, "bootdev=", 8) == 0) {
    128 				strncpy(xcp->xcp_bootdev, opt + 8,
    129 				    sizeof(xcp->xcp_bootdev));
    130 				break;
    131 			}
    132 			if (strncasecmp(opt, "root=", 5) == 0) {
    133 				strncpy(xcp->xcp_bootdev, opt + 5,
    134 				    sizeof(xcp->xcp_bootdev));
    135 				break;
    136 			}
    137 			break;
    138 
    139 		case XEN_PARSE_NETINFO:
    140 			if (xcp->xcp_netinfo.xi_root &&
    141 			    strncasecmp(opt, "nfsroot=", 8) == 0)
    142 				strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
    143 				    MNAMELEN);
    144 
    145 			if (strncasecmp(opt, "ip=", 3) == 0) {
    146 				memset(xi_ip, 0, sizeof(xi_ip));
    147 				opt += 3;
    148 				ipidx = 0;
    149 				while (opt && *opt) {
    150 					s = opt;
    151 					opt = strchr(opt, ':');
    152 					if (opt)
    153 						*opt = 0;
    154 
    155 					switch (ipidx) {
    156 					case 0:	/* ip */
    157 					case 1:	/* nfs server */
    158 					case 2:	/* gw */
    159 					case 3:	/* mask */
    160 					case 4:	/* host */
    161 						if (*s == 0)
    162 							break;
    163 						for (i = 0; i < 4; i++) {
    164 							b = strtoul(s, &s, 10);
    165 							xi_ip[ipidx] = b + 256
    166 								* xi_ip[ipidx];
    167 							if (*s != '.')
    168 								break;
    169 							s++;
    170 						}
    171 						if (i < 3)
    172 							xi_ip[ipidx] = 0;
    173 						break;
    174 					case 5:	/* interface */
    175 						if (!strncmp(s, "xennet", 6))
    176 							s += 6;
    177 						else if (!strncmp(s, "eth", 3))
    178 							s += 3;
    179 						else
    180 							break;
    181 						if (xcp->xcp_netinfo.xi_ifno
    182 						    == strtoul(s, NULL, 10))
    183 							memcpy(xcp->
    184 							    xcp_netinfo.xi_ip,
    185 							    xi_ip,
    186 							    sizeof(xi_ip));
    187 						break;
    188 					}
    189 					ipidx++;
    190 
    191 					if (opt)
    192 						*opt++ = ':';
    193 				}
    194 			}
    195 			break;
    196 
    197 		case XEN_PARSE_CONSOLE:
    198 			if (strncasecmp(opt, "console=", 8) == 0)
    199 				strncpy(xcp->xcp_console, opt + 8,
    200 				    sizeof(xcp->xcp_console));
    201 			break;
    202 
    203 		case XEN_PARSE_BOOTFLAGS:
    204 			if (*opt == '-') {
    205 				opt++;
    206 				while(*opt != '\0') {
    207 					BOOT_FLAG(*opt, boothowto);
    208 					opt++;
    209 				}
    210 			}
    211 			break;
    212 		case XEN_PARSE_PCIBACK:
    213 			if (strncasecmp(opt, "pciback.hide=", 13) == 0)
    214 				strncpy(xcp->xcp_pcidevs, opt + 13,
    215 				    sizeof(xcp->xcp_pcidevs));
    216 			break;
    217 		}
    218 
    219 		if (cmd_line)
    220 			*cmd_line++ = ' ';
    221 	}
    222 }
    223 
    224 #ifdef XENPV
    225 
    226 static int sysctl_xen_suspend(SYSCTLFN_ARGS);
    227 static void xen_suspend_domain(void);
    228 static void xen_prepare_suspend(void);
    229 static void xen_prepare_resume(void);
    230 
    231 /*
    232  * this function sets up the machdep.xen.suspend sysctl(7) that
    233  * controls domain suspend/save.
    234  */
    235 void
    236 sysctl_xen_suspend_setup(void)
    237 {
    238 	const struct sysctlnode *node = NULL;
    239 
    240 	/*
    241 	 * dom0 implements sleep support through ACPI. It should not call
    242 	 * this function to register a suspend interface.
    243 	 */
    244 	KASSERT(!(xendomain_is_dom0()));
    245 
    246 	sysctl_createv(NULL, 0, NULL, &node,
    247 	    CTLFLAG_PERMANENT,
    248 	    CTLTYPE_NODE, "machdep", NULL,
    249 	    NULL, 0, NULL, 0,
    250 	    CTL_MACHDEP, CTL_EOL);
    251 
    252 	sysctl_createv(NULL, 0, &node, &node,
    253 	    CTLFLAG_PERMANENT,
    254 	    CTLTYPE_NODE, "xen",
    255 	    SYSCTL_DESCR("Xen top level node"),
    256 	    NULL, 0, NULL, 0,
    257 	    CTL_CREATE, CTL_EOL);
    258 
    259 	sysctl_createv(NULL, 0, &node, &node,
    260 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE,
    261 	    CTLTYPE_INT, "suspend",
    262 	    SYSCTL_DESCR("Suspend/save current Xen domain"),
    263 	    sysctl_xen_suspend, 0, NULL, 0,
    264 	    CTL_CREATE, CTL_EOL);
    265 }
    266 
    267 static int
    268 sysctl_xen_suspend(SYSCTLFN_ARGS)
    269 {
    270 	int error;
    271 	struct sysctlnode node;
    272 
    273 	node = *rnode;
    274 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    275 
    276 	if (error || newp == NULL)
    277 		return error;
    278 
    279 	/* only allow domain to suspend when dom0 instructed to do so */
    280 	if (xen_suspend_allow == false)
    281 		return EAGAIN;
    282 
    283 	xen_suspend_domain();
    284 
    285 	return 0;
    286 
    287 }
    288 
    289 static void xen_suspendclocks_xc(void *, void*);
    290 static void xen_resumeclocks_xc(void *, void*);
    291 
    292 /*
    293  * Last operations before suspending domain
    294  */
    295 static void
    296 xen_prepare_suspend(void)
    297 {
    298 
    299 	kpreempt_disable();
    300 
    301 	pmap_xen_suspend();
    302 	xc_wait(xc_broadcast(0, &xen_suspendclocks_xc, NULL, NULL));
    303 
    304 	/*
    305 	 * save/restore code does not translate these MFNs to their
    306 	 * associated PFNs, so we must do it
    307 	 */
    308 	xen_start_info.store_mfn =
    309 	    atop(xpmap_mtop(ptoa(xen_start_info.store_mfn)));
    310 	xen_start_info.console_mfn =
    311 	    atop(xpmap_mtop(ptoa(xen_start_info.console_mfn)));
    312 
    313 	DPRINTK(("suspending domain\n"));
    314 	aprint_verbose("suspending domain\n");
    315 
    316 	/* invalidate the shared_info page */
    317 	if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
    318 	    0, UVMF_INVLPG)) {
    319 		DPRINTK(("HYPERVISOR_shared_info page invalidation failed"));
    320 		HYPERVISOR_crash();
    321 	}
    322 
    323 }
    324 
    325 static void
    326 xen_suspendclocks_xc(void *a, void *b)
    327 {
    328 
    329 	kpreempt_disable();
    330 	xen_suspendclocks(curcpu());
    331 	kpreempt_enable();
    332 }
    333 
    334 /*
    335  * First operations before restoring domain context
    336  */
    337 static void
    338 xen_prepare_resume(void)
    339 {
    340 	/* map the new shared_info page */
    341 	if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
    342 	    xen_start_info.shared_info | PTE_W | PTE_P,
    343 	    UVMF_INVLPG)) {
    344 		DPRINTK(("could not map new shared info page"));
    345 		HYPERVISOR_crash();
    346 	}
    347 
    348 	pmap_xen_resume();
    349 
    350 	if (xen_start_info.nr_pages != physmem) {
    351 		/*
    352 		 * XXX JYM for now, we crash - fix it with memory
    353 		 * hotplug when supported
    354 		 */
    355 		DPRINTK(("xen_start_info.nr_pages != physmem"));
    356 		HYPERVISOR_crash();
    357 	}
    358 
    359 	DPRINTK(("preparing domain resume\n"));
    360 	aprint_verbose("preparing domain resume\n");
    361 
    362 	xen_suspend_allow = false;
    363 
    364 	xc_wait(xc_broadcast(0, xen_resumeclocks_xc, NULL, NULL));
    365 
    366 	kpreempt_enable();
    367 
    368 }
    369 
    370 static void
    371 xen_resumeclocks_xc(void *a, void *b)
    372 {
    373 
    374 	kpreempt_disable();
    375 	xen_resumeclocks(curcpu());
    376 	kpreempt_enable();
    377 }
    378 
    379 static void
    380 xen_suspend_domain(void)
    381 {
    382 	paddr_t mfn;
    383 	int s = splvm(); /* XXXSMP */
    384 
    385 	/*
    386 	 * console becomes unavailable when suspended, so
    387 	 * direct communications to domain are hampered from there on.
    388 	 * We can only rely on low level primitives like printk(), until
    389 	 * console is fully restored
    390 	 */
    391 	if (!pmf_system_suspend(PMF_Q_NONE)) {
    392 		DPRINTK(("devices suspend failed"));
    393 		HYPERVISOR_crash();
    394 	}
    395 
    396 	/*
    397 	 * obtain the MFN of the start_info page now, as we will not be
    398 	 * able to do it once pmap is locked
    399 	 */
    400 	pmap_extract_ma(pmap_kernel(), (vaddr_t)&xen_start_info, &mfn);
    401 	mfn >>= PAGE_SHIFT;
    402 
    403 	xen_prepare_suspend();
    404 
    405 	DPRINTK(("calling HYPERVISOR_suspend()\n"));
    406 	if (HYPERVISOR_suspend(mfn) != 0) {
    407 	/* XXX JYM: implement checkpoint/snapshot (ret == 1) */
    408 		DPRINTK(("HYPERVISOR_suspend() failed"));
    409 		HYPERVISOR_crash();
    410 	}
    411 
    412 	DPRINTK(("left HYPERVISOR_suspend()\n"));
    413 
    414 	xen_prepare_resume();
    415 
    416 	DPRINTK(("resuming devices\n"));
    417 	if (!pmf_system_resume(PMF_Q_NONE)) {
    418 		DPRINTK(("devices resume failed\n"));
    419 		HYPERVISOR_crash();
    420 	}
    421 
    422 	splx(s);
    423 
    424 	/* xencons is back online, we can print to console */
    425 	aprint_verbose("domain resumed\n");
    426 
    427 }
    428 #endif /* XENPV */
    429 
    430 #define PRINTK_BUFSIZE 1024
    431 void
    432 printk(const char *fmt, ...)
    433 {
    434 	va_list ap;
    435 	int ret;
    436 	static char buf[PRINTK_BUFSIZE];
    437 
    438 	va_start(ap, fmt);
    439 	ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
    440 	va_end(ap);
    441 	buf[ret] = 0;
    442 	(void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
    443 }
    444 
    445 static int early_xenconscn_getc(dev_t);
    446 static void early_xenconscn_putc(dev_t, int);
    447 static void early_xenconscn_pollc(dev_t, int);
    448 
    449 static struct consdev early_xencons = {
    450 	NULL, NULL,
    451 	early_xenconscn_getc, early_xenconscn_putc, early_xenconscn_pollc,
    452 	NULL, NULL, NULL, NODEV, CN_NORMAL
    453 };
    454 
    455 void
    456 xen_early_console(void)
    457 {
    458 	cn_tab = &early_xencons; /* fallback console */
    459 }
    460 
    461 static int
    462 early_xenconscn_getc(dev_t dev)
    463 {
    464 	while(1)
    465 		;
    466 	return -1;
    467 }
    468 
    469 static void
    470 early_xenconscn_putc(dev_t dev, int c)
    471 {
    472 	printk("%c", c);
    473 }
    474 
    475 static void
    476 early_xenconscn_pollc(dev_t dev, int on)
    477 {
    478 	return;
    479 }
    480 bool xen_feature_tables[XENFEAT_NR_SUBMAPS * 32];
    481 
    482 void
    483 xen_init_features(void)
    484 {
    485 	xen_feature_info_t features;
    486 
    487 	for (int sm = 0; sm < XENFEAT_NR_SUBMAPS; sm++) {
    488 		features.submap_idx = sm;
    489 		if (HYPERVISOR_xen_version(XENVER_get_features, &features) < 0)
    490 			break;
    491 		for (int f = 0; f < 32; f++) {
    492 			xen_feature_tables[sm * 32 + f] =
    493 			    (features.submap & (1 << f)) ? 1 : 0;
    494 		}
    495 	}
    496 }
    497 
    498 /*
    499  * Attempt to find the device from which we were booted.
    500  */
    501 
    502 static int
    503 is_valid_disk(device_t dv)
    504 {
    505 	if (device_class(dv) != DV_DISK)
    506 		return (0);
    507 
    508 	return (device_is_a(dv, "dk") ||
    509 		device_is_a(dv, "sd") ||
    510 		device_is_a(dv, "wd") ||
    511 		device_is_a(dv, "ld") ||
    512 		device_is_a(dv, "ed") ||
    513 		device_is_a(dv, "xbd"));
    514 }
    515 
    516 void
    517 xen_bootconf(void)
    518 {
    519 	device_t dv;
    520 	deviter_t di;
    521 	union xen_cmdline_parseinfo xcp;
    522 	static char bootspecbuf[sizeof(xcp.xcp_bootdev)];
    523 
    524 	if (booted_device) {
    525 		DPRINTF(("%s: preset booted_device: %s\n", __func__, device_xname(booted_device)));
    526 		return;
    527 	}
    528 
    529 	xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
    530 
    531 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
    532 	     dv != NULL;
    533 	     dv = deviter_next(&di)) {
    534 		bool is_ifnet, is_disk;
    535 		const char *devname;
    536 
    537 		is_ifnet = (device_class(dv) == DV_IFNET);
    538 		is_disk = is_valid_disk(dv);
    539 		devname = device_xname(dv);
    540 
    541 		if (!is_ifnet && !is_disk)
    542 			continue;
    543 
    544 		if (is_disk && xcp.xcp_bootdev[0] == 0) {
    545 			booted_device = dv;
    546 			break;
    547 		}
    548 
    549 		if (strncmp(xcp.xcp_bootdev, devname, strlen(devname)))
    550 			continue;
    551 
    552 		if (is_disk && strlen(xcp.xcp_bootdev) > strlen(devname)) {
    553 			/* XXX check device_cfdata as in x86_autoconf.c? */
    554 			booted_partition = toupper(
    555 				xcp.xcp_bootdev[strlen(devname)]) - 'A';
    556 			DPRINTF(("%s: booted_partition: %d\n", __func__, booted_partition));
    557 		}
    558 
    559 		booted_device = dv;
    560 		booted_method = "bootinfo/bootdev";
    561 		break;
    562 	}
    563 	deviter_release(&di);
    564 
    565 	if (booted_device) {
    566 		DPRINTF(("%s: booted_device: %s\n", __func__, device_xname(booted_device)));
    567 		return;
    568 	}
    569 
    570 	/*
    571 	 * not a boot device name, pass through to MI code
    572 	 */
    573 	if (xcp.xcp_bootdev[0] != '\0') {
    574 		strlcpy(bootspecbuf, xcp.xcp_bootdev, sizeof(bootspecbuf));
    575 		bootspec = bootspecbuf;
    576 		booted_method = "bootinfo/bootspec";
    577 		DPRINTF(("%s: bootspec: %s\n", __func__, bootspec));
    578 		return;
    579 	}
    580 }
    581