vmm: Deduplicate VM and vCPU state management code

Now that the machine-independent fields of struct vm and struct vcpu are
available in a header, we can move lots of duplicated code into
sys/dev/vmm/vmm_vm.c.  This change does exactly that.

No functional change intended.

MFC after:	2 months
Sponsored by:	The FreeBSD Foundation
Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D53585
This commit is contained in:
Mark Johnston
2026-01-08 21:54:06 +00:00
parent a6411f6b7d
commit ed85203fb7
18 changed files with 551 additions and 1227 deletions
-37
View File
@@ -233,19 +233,7 @@ struct vmm_ops {
extern const struct vmm_ops vmm_ops_intel;
extern const struct vmm_ops vmm_ops_amd;
int vm_create(const char *name, struct vm **retvm);
struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
void vm_disable_vcpu_creation(struct vm *vm);
void vm_lock_vcpus(struct vm *vm);
void vm_unlock_vcpus(struct vm *vm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
uint16_t vm_get_maxcpus(struct vm *vm);
void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
@@ -259,7 +247,6 @@ int vm_get_seg_desc(struct vcpu *vcpu, int reg,
int vm_set_seg_desc(struct vcpu *vcpu, int reg,
struct seg_desc *desc);
int vm_run(struct vcpu *vcpu);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vcpu *vcpu);
int vm_nmi_pending(struct vcpu *vcpu);
void vm_nmi_clear(struct vcpu *vcpu);
@@ -277,9 +264,6 @@ int vm_set_capability(struct vcpu *vcpu, int type, int val);
int vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state);
int vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state);
int vm_apicid2vcpuid(struct vm *vm, int apicid);
int vm_activate_cpu(struct vcpu *vcpu);
int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_restart_instruction(struct vcpu *vcpu);
struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
cpuset_t *vm_exitinfo_cpuset(struct vcpu *vcpu);
@@ -292,24 +276,6 @@ int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
int vm_restore_time(struct vm *vm);
#ifdef _SYS__CPUSET_H_
/*
* Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
* The rendezvous 'func(arg)' is not allowed to do anything that will
* cause the thread to be put to sleep.
*
* The caller cannot hold any locks when initiating the rendezvous.
*
* The implementation of this API may cause vcpus other than those specified
* by 'dest' to be stalled. The caller should not rely on any vcpus making
* forward progress when the rendezvous is in progress.
*/
typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg);
int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest,
vm_rendezvous_func_t func, void *arg);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
cpuset_t vm_start_cpus(struct vm *vm, const cpuset_t *tostart);
void vm_await_start(struct vm *vm, const cpuset_t *waiting);
#endif /* _SYS__CPUSET_H_ */
@@ -341,8 +307,6 @@ vcpu_reqidle(struct vm_eventinfo *info)
return (*info->iptr);
}
int vcpu_debugged(struct vcpu *vcpu);
/*
* Return true if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
@@ -354,7 +318,6 @@ bool vmm_is_pptdev(int bus, int slot, int func);
void *vm_iommu_domain(struct vm *vm);
void *vcpu_stats(struct vcpu *vcpu);
void vcpu_notify_event(struct vcpu *vcpu);
void vcpu_notify_lapic(struct vcpu *vcpu);
struct vm_mem *vm_mem(struct vm *vm);
struct vatpic *vm_atpic(struct vm *vm);
+1
View File
@@ -43,6 +43,7 @@
#include <machine/vmm_snapshot.h>
#include <dev/vmm/vmm_ktr.h>
#include <dev/vmm/vmm_vm.h>
#include "vmm_lapic.h"
#include "vlapic.h"
+1
View File
@@ -47,6 +47,7 @@
#include <machine/vmm_snapshot.h>
#include <dev/vmm/vmm_ktr.h>
#include <dev/vmm/vmm_vm.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
+5 -478
View File
@@ -161,8 +161,7 @@ static MALLOC_DEFINE(M_VM, "vm", "vm");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
NULL);
SYSCTL_DECL(_hw_vmm);
/*
* Halt the guest if all vcpus are executing a HLT instruction with
@@ -173,10 +172,6 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
&halt_detection_enabled, 0,
"Halt VM if all vcpus execute HLT with interrupts disabled");
static int vmm_ipinum;
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
"IPI vector used for vcpu notifications");
static int trace_guest_exceptions;
SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
&trace_guest_exceptions, 0,
@@ -186,8 +181,6 @@ static int trap_wbinvd;
SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0,
"WBINVD triggers a VM-exit");
static void vcpu_notify_event_locked(struct vcpu *vcpu);
/* global statistics */
VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
@@ -284,7 +277,6 @@ vcpu_init(struct vcpu *vcpu)
int
vcpu_trace_exceptions(struct vcpu *vcpu)
{
return (trace_guest_exceptions);
}
@@ -364,14 +356,6 @@ vm_init(struct vm *vm, bool create)
}
}
void
vm_disable_vcpu_creation(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
vm->dying = true;
sx_xunlock(&vm->vcpus_init_lock);
}
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
@@ -402,18 +386,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
return (vcpu);
}
void
vm_lock_vcpus(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
}
void
vm_unlock_vcpus(struct vm *vm)
{
sx_unlock(&vm->vcpus_init_lock);
}
int
vm_create(const char *name, struct vm **retvm)
{
@@ -443,35 +415,6 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
void
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus)
{
*sockets = vm->sockets;
*cores = vm->cores;
*threads = vm->threads;
*maxcpus = vm->maxcpus;
}
uint16_t
vm_get_maxcpus(struct vm *vm)
{
return (vm->maxcpus);
}
int
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus __unused)
{
/* Ignore maxcpus. */
if ((sockets * cores * threads) > vm->maxcpus)
return (EINVAL);
vm->sockets = sockets;
vm->cores = cores;
vm->threads = threads;
return(0);
}
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -520,23 +463,11 @@ vm_destroy(struct vm *vm)
free(vm, M_VM);
}
int
vm_reinit(struct vm *vm)
void
vm_reset(struct vm *vm)
{
int error;
/*
* A virtual machine can be reset only if all vcpus are suspended.
*/
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
vm_cleanup(vm, false);
vm_init(vm, false);
error = 0;
} else {
error = EBUSY;
}
return (error);
vm_cleanup(vm, false);
vm_init(vm, false);
}
const char *
@@ -810,210 +741,6 @@ save_guest_fpustate(struct vcpu *vcpu)
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
/*
* Invoke the rendezvous function on the specified vcpu if applicable. Return
* true if the rendezvous is finished, false otherwise.
*/
static bool
vm_rendezvous(struct vcpu *vcpu)
{
struct vm *vm = vcpu->vm;
int vcpuid;
mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
KASSERT(vcpu->vm->rendezvous_func != NULL,
("vm_rendezvous: no rendezvous pending"));
/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
&vm->active_cpus);
vcpuid = vcpu->vcpuid;
if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
!CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
VMM_CTR0(vcpu, "Calling rendezvous func");
(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
}
if (CPU_CMP(&vm->rendezvous_req_cpus,
&vm->rendezvous_done_cpus) == 0) {
VMM_CTR0(vcpu, "Rendezvous completed");
CPU_ZERO(&vm->rendezvous_req_cpus);
vm->rendezvous_func = NULL;
wakeup(&vm->rendezvous_func);
return (true);
}
return (false);
}
static void
vcpu_wait_idle(struct vcpu *vcpu)
{
KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
vcpu->reqidle = 1;
vcpu_notify_event_locked(vcpu);
VMM_CTR1(vcpu, "vcpu state change from %s to "
"idle requested", vcpu_state2str(vcpu->state));
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
}
static int
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
bool from_idle)
{
int error;
vcpu_assert_locked(vcpu);
/*
* State transitions from the vmmdev_ioctl() must always begin from
* the VCPU_IDLE state. This guarantees that there is only a single
* ioctl() operating on a vcpu at any point.
*/
if (from_idle) {
while (vcpu->state != VCPU_IDLE)
vcpu_wait_idle(vcpu);
} else {
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
"vcpu idle state"));
}
if (vcpu->state == VCPU_RUNNING) {
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
} else {
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
"vcpu that is not running", vcpu->hostcpu));
}
/*
* The following state transitions are allowed:
* IDLE -> FROZEN -> IDLE
* FROZEN -> RUNNING -> FROZEN
* FROZEN -> SLEEPING -> FROZEN
*/
switch (vcpu->state) {
case VCPU_IDLE:
case VCPU_RUNNING:
case VCPU_SLEEPING:
error = (newstate != VCPU_FROZEN);
break;
case VCPU_FROZEN:
error = (newstate == VCPU_FROZEN);
break;
default:
error = 1;
break;
}
if (error)
return (EBUSY);
VMM_CTR2(vcpu, "vcpu state changed from %s to %s",
vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
vcpu->state = newstate;
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
else
vcpu->hostcpu = NOCPU;
if (newstate == VCPU_IDLE)
wakeup(&vcpu->state);
return (0);
}
/*
* Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
* with vm_smp_rendezvous().
*
* The complexity here suggests that the rendezvous mechanism needs a rethink.
*/
int
vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
{
cpuset_t locked;
struct vcpu *vcpu;
int error, i;
uint16_t maxcpus;
KASSERT(newstate != VCPU_IDLE,
("vcpu_set_state_all: invalid target state %d", newstate));
error = 0;
CPU_ZERO(&locked);
maxcpus = vm->maxcpus;
mtx_lock(&vm->rendezvous_mtx);
restart:
if (vm->rendezvous_func != NULL) {
/*
* If we have a pending rendezvous, then the initiator may be
* blocked waiting for other vCPUs to execute the callback. The
* current thread may be a vCPU thread so we must not block
* waiting for the initiator, otherwise we get a deadlock.
* Thus, execute the callback on behalf of any idle vCPUs.
*/
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
vcpu_lock(vcpu);
if (vcpu->state == VCPU_IDLE) {
(void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
true);
CPU_SET(i, &locked);
}
if (CPU_ISSET(i, &locked)) {
/*
* We can safely execute the callback on this
* vCPU's behalf.
*/
vcpu_unlock(vcpu);
(void)vm_rendezvous(vcpu);
vcpu_lock(vcpu);
}
vcpu_unlock(vcpu);
}
}
/*
* Now wait for remaining vCPUs to become idle. This may include the
* initiator of a rendezvous that is currently blocked on the rendezvous
* mutex.
*/
CPU_FOREACH_ISCLR(i, &locked) {
if (i >= maxcpus)
break;
vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
vcpu_lock(vcpu);
while (vcpu->state != VCPU_IDLE) {
mtx_unlock(&vm->rendezvous_mtx);
vcpu_wait_idle(vcpu);
vcpu_unlock(vcpu);
mtx_lock(&vm->rendezvous_mtx);
if (vm->rendezvous_func != NULL)
goto restart;
vcpu_lock(vcpu);
}
error = vcpu_set_state_locked(vcpu, newstate, true);
vcpu_unlock(vcpu);
if (error != 0) {
/* Roll back state changes. */
CPU_FOREACH_ISSET(i, &locked)
(void)vcpu_set_state(vcpu, VCPU_IDLE, false);
break;
}
CPU_SET(i, &locked);
}
mtx_unlock(&vm->rendezvous_mtx);
return (error);
}
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
{
@@ -1032,37 +759,6 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
panic("Error %d setting state to %d", error, newstate);
}
static int
vm_handle_rendezvous(struct vcpu *vcpu)
{
struct vm *vm;
struct thread *td;
td = curthread;
vm = vcpu->vm;
mtx_lock(&vm->rendezvous_mtx);
while (vm->rendezvous_func != NULL) {
if (vm_rendezvous(vcpu))
break;
VMM_CTR0(vcpu, "Wait for rendezvous completion");
mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
"vmrndv", hz);
if (td_ast_pending(td, TDA_SUSPEND)) {
int error;
mtx_unlock(&vm->rendezvous_mtx);
error = thread_check_susp(td, true);
if (error != 0)
return (error);
mtx_lock(&vm->rendezvous_mtx);
}
}
mtx_unlock(&vm->rendezvous_mtx);
return (0);
}
/*
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
*/
@@ -1386,33 +1082,6 @@ vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
return (0);
}
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
int i;
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
return (EINVAL);
if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
VM_CTR2(vm, "virtual machine already suspended %d/%d",
vm->suspend, how);
return (EALREADY);
}
VM_CTR1(vm, "virtual machine successfully suspended %d", how);
/*
* Notify all active vcpus that they are now suspended.
*/
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
return (0);
}
void
vm_exit_suspended(struct vcpu *vcpu, uint64_t rip)
{
@@ -2039,107 +1708,6 @@ vm_iommu_domain(struct vm *vm)
return (vm->iommu);
}
int
vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
{
int error;
vcpu_lock(vcpu);
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
vcpu_unlock(vcpu);
return (error);
}
enum vcpu_state
vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
{
enum vcpu_state state;
vcpu_lock(vcpu);
state = vcpu->state;
if (hostcpu != NULL)
*hostcpu = vcpu->hostcpu;
vcpu_unlock(vcpu);
return (state);
}
int
vm_activate_cpu(struct vcpu *vcpu)
{
struct vm *vm = vcpu->vm;
if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EBUSY);
VMM_CTR0(vcpu, "activated");
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
return (0);
}
int
vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
vm->debug_cpus = vm->active_cpus;
for (int i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EINVAL);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
vcpu_notify_event(vcpu);
}
return (0);
}
int
vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
CPU_ZERO(&vm->debug_cpus);
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
return (EINVAL);
CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
}
return (0);
}
int
vcpu_debugged(struct vcpu *vcpu)
{
return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
}
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
}
cpuset_t
vm_debug_cpus(struct vm *vm)
{
return (vm->debug_cpus);
}
cpuset_t
vm_suspended_cpus(struct vm *vm)
{
return (vm->suspended_cpus);
}
/*
* Returns the subset of vCPUs in tostart that are awaiting startup.
* These vCPUs are also marked as no longer awaiting startup.
@@ -2192,47 +1760,6 @@ vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state)
return (0);
}
/*
* This function is called to ensure that a vcpu "sees" a pending event
* as soon as possible:
* - If the vcpu thread is sleeping then it is woken up.
* - If the vcpu is running on a different host_cpu then an IPI will be directed
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
static void
vcpu_notify_event_locked(struct vcpu *vcpu)
{
int hostcpu;
hostcpu = vcpu->hostcpu;
if (vcpu->state == VCPU_RUNNING) {
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
if (hostcpu != curcpu) {
ipi_cpu(hostcpu, vmm_ipinum);
} else {
/*
* If the 'vcpu' is running on 'curcpu' then it must
* be sending a notification to itself (e.g. SELF_IPI).
* The pending event will be picked up when the vcpu
* transitions back to guest context.
*/
}
} else {
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
"with hostcpu %d", vcpu->state, hostcpu));
if (vcpu->state == VCPU_SLEEPING)
wakeup_one(vcpu);
}
}
void
vcpu_notify_event(struct vcpu *vcpu)
{
vcpu_lock(vcpu);
vcpu_notify_event_locked(vcpu);
vcpu_unlock(vcpu);
}
void
vcpu_notify_lapic(struct vcpu *vcpu)
{
+1
View File
@@ -34,6 +34,7 @@
#include <x86/apicreg.h>
#include <dev/vmm/vmm_ktr.h>
#include <dev/vmm/vmm_vm.h>
#include <machine/vmm.h>
#include "vmm_lapic.h"
+1
View File
@@ -39,6 +39,7 @@
#include <machine/vmm.h>
#include <dev/vmm/vmm_ktr.h>
#include <dev/vmm/vmm_vm.h>
#include "vmm_host.h"
#include "vmm_util.h"
-26
View File
@@ -181,24 +181,11 @@ DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now));
#endif
#endif
int vm_create(const char *name, struct vm **retvm);
struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
void vm_disable_vcpu_creation(struct vm *vm);
void vm_lock_vcpus(struct vm *vm);
void vm_unlock_vcpus(struct vm *vm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
uint16_t vm_get_maxcpus(struct vm *vm);
void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus);
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
int vm_run(struct vcpu *vcpu);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
void* vm_get_cookie(struct vm *vm);
int vcpu_vcpuid(struct vcpu *vcpu);
void *vcpu_get_cookie(struct vcpu *vcpu);
@@ -206,9 +193,6 @@ struct vm *vcpu_vm(struct vcpu *vcpu);
struct vcpu *vm_vcpu(struct vm *vm, int cpu);
int vm_get_capability(struct vcpu *vcpu, int type, int *val);
int vm_set_capability(struct vcpu *vcpu, int type, int val);
int vm_activate_cpu(struct vcpu *vcpu);
int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far);
int vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr);
int vm_assert_irq(struct vm *vm, uint32_t irq);
@@ -218,13 +202,8 @@ int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
static __inline int
vcpu_rendezvous_pending(struct vm_eventinfo *info)
{
@@ -239,14 +218,9 @@ vcpu_suspended(struct vm_eventinfo *info)
return (*info->sptr);
}
int vcpu_debugged(struct vcpu *vcpu);
void *vcpu_stats(struct vcpu *vcpu);
void vcpu_notify_event(struct vcpu *vcpu);
struct vm_mem *vm_mem(struct vm *vm);
enum vm_reg_name vm_segment_name(int seg_encoding);
struct vm_copyinfo {
uint64_t gpa;
size_t len;
+1
View File
@@ -69,6 +69,7 @@
#include <arm64/vmm/vmm_handlers.h>
#include <dev/vmm/vmm_dev.h>
#include <dev/vmm/vmm_vm.h>
#include "vgic.h"
#include "vgic_v3.h"
+5 -312
View File
@@ -40,7 +40,6 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -74,20 +73,11 @@
#include "io/vgic.h"
#include "io/vtimer.h"
static int vm_handle_wfi(struct vcpu *vcpu,
struct vm_exit *vme, bool *retu);
static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
static int vmm_ipinum;
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
"IPI vector used for vcpu notifications");
struct vmm_regs {
uint64_t id_aa64afr0;
uint64_t id_aa64afr1;
@@ -142,8 +132,6 @@ static const struct vmm_regs vmm_arch_regs_masks = {
/* Host registers masked by vmm_arch_regs_masks. */
static struct vmm_regs vmm_arch_regs;
static void vcpu_notify_event_locked(struct vcpu *vcpu);
/* global statistics */
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
@@ -294,14 +282,6 @@ vm_init(struct vm *vm, bool create)
}
}
void
vm_disable_vcpu_creation(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
vm->dying = true;
sx_xunlock(&vm->vcpus_init_lock);
}
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
@@ -338,18 +318,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
return (vcpu);
}
void
vm_lock_vcpus(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
}
void
vm_unlock_vcpus(struct vm *vm)
{
sx_unlock(&vm->vcpus_init_lock);
}
int
vm_create(const char *name, struct vm **retvm)
{
@@ -363,6 +331,7 @@ vm_create(const char *name, struct vm **retvm)
return (error);
}
strcpy(vm->name, name);
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->sockets = 1;
@@ -379,35 +348,6 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
void
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus)
{
*sockets = vm->sockets;
*cores = vm->cores;
*threads = vm->threads;
*maxcpus = vm->maxcpus;
}
uint16_t
vm_get_maxcpus(struct vm *vm)
{
return (vm->maxcpus);
}
int
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus)
{
/* Ignore maxcpus. */
if ((sockets * cores * threads) > vm->maxcpus)
return (EINVAL);
vm->sockets = sockets;
vm->cores = cores;
vm->threads = threads;
return(0);
}
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -452,23 +392,11 @@ vm_destroy(struct vm *vm)
free(vm, M_VMM);
}
int
vm_reinit(struct vm *vm)
void
vm_reset(struct vm *vm)
{
int error;
/*
* A virtual machine can be reset only if all vcpus are suspended.
*/
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
vm_cleanup(vm, false);
vm_init(vm, false);
error = 0;
} else {
error = EBUSY;
}
return (error);
vm_cleanup(vm, false);
vm_init(vm, false);
}
const char *
@@ -765,33 +693,6 @@ vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
return (0);
}
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
int i;
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
return (EINVAL);
if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
VM_CTR2(vm, "virtual machine already suspended %d/%d",
vm->suspend, how);
return (EALREADY);
}
VM_CTR1(vm, "virtual machine successfully suspended %d", how);
/*
* Notify all active vcpus that they are now suspended.
*/
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
return (0);
}
void
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
{
@@ -819,82 +720,6 @@ vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
vmexit->exitcode = VM_EXITCODE_DEBUG;
}
int
vm_activate_cpu(struct vcpu *vcpu)
{
struct vm *vm = vcpu->vm;
if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EBUSY);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
return (0);
}
int
vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
vm->debug_cpus = vm->active_cpus;
for (int i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EINVAL);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
vcpu_notify_event(vcpu);
}
return (0);
}
int
vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
CPU_ZERO(&vm->debug_cpus);
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
return (EINVAL);
CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
}
return (0);
}
int
vcpu_debugged(struct vcpu *vcpu)
{
return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
}
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
}
cpuset_t
vm_debug_cpus(struct vm *vm)
{
return (vm->debug_cpus);
}
cpuset_t
vm_suspended_cpus(struct vm *vm)
{
return (vm->suspended_cpus);
}
void *
vcpu_stats(struct vcpu *vcpu)
{
@@ -902,47 +727,6 @@ vcpu_stats(struct vcpu *vcpu)
return (vcpu->stats);
}
/*
* This function is called to ensure that a vcpu "sees" a pending event
* as soon as possible:
* - If the vcpu thread is sleeping then it is woken up.
* - If the vcpu is running on a different host_cpu then an IPI will be directed
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
static void
vcpu_notify_event_locked(struct vcpu *vcpu)
{
int hostcpu;
hostcpu = vcpu->hostcpu;
if (vcpu->state == VCPU_RUNNING) {
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
if (hostcpu != curcpu) {
ipi_cpu(hostcpu, vmm_ipinum);
} else {
/*
* If the 'vcpu' is running on 'curcpu' then it must
* be sending a notification to itself (e.g. SELF_IPI).
* The pending event will be picked up when the vcpu
* transitions back to guest context.
*/
}
} else {
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
"with hostcpu %d", vcpu->state, hostcpu));
if (vcpu->state == VCPU_SLEEPING)
wakeup_one(vcpu);
}
}
void
vcpu_notify_event(struct vcpu *vcpu)
{
vcpu_lock(vcpu);
vcpu_notify_event_locked(vcpu);
vcpu_unlock(vcpu);
}
struct vm_mem *
vm_mem(struct vm *vm)
{
@@ -984,71 +768,6 @@ save_guest_fpustate(struct vcpu *vcpu)
KASSERT(PCPU_GET(fpcurthread) == NULL,
("%s: fpcurthread set with guest registers", __func__));
}
static int
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
bool from_idle)
{
int error;
vcpu_assert_locked(vcpu);
/*
* State transitions from the vmmdev_ioctl() must always begin from
* the VCPU_IDLE state. This guarantees that there is only a single
* ioctl() operating on a vcpu at any point.
*/
if (from_idle) {
while (vcpu->state != VCPU_IDLE) {
vcpu_notify_event_locked(vcpu);
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
}
} else {
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
"vcpu idle state"));
}
if (vcpu->state == VCPU_RUNNING) {
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
} else {
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
"vcpu that is not running", vcpu->hostcpu));
}
/*
* The following state transitions are allowed:
* IDLE -> FROZEN -> IDLE
* FROZEN -> RUNNING -> FROZEN
* FROZEN -> SLEEPING -> FROZEN
*/
switch (vcpu->state) {
case VCPU_IDLE:
case VCPU_RUNNING:
case VCPU_SLEEPING:
error = (newstate != VCPU_FROZEN);
break;
case VCPU_FROZEN:
error = (newstate == VCPU_FROZEN);
break;
default:
error = 1;
break;
}
if (error)
return (EBUSY);
vcpu->state = newstate;
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
else
vcpu->hostcpu = NOCPU;
if (newstate == VCPU_IDLE)
wakeup(&vcpu->state);
return (0);
}
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
@@ -1110,32 +829,6 @@ vm_vcpu(struct vm *vm, int vcpuid)
return (vm->vcpu[vcpuid]);
}
int
vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
{
int error;
vcpu_lock(vcpu);
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
vcpu_unlock(vcpu);
return (error);
}
enum vcpu_state
vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
{
enum vcpu_state state;
vcpu_lock(vcpu);
state = vcpu->state;
if (hostcpu != NULL)
*hostcpu = vcpu->hostcpu;
vcpu_unlock(vcpu);
return (state);
}
int
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
-32
View File
@@ -141,38 +141,6 @@ vcpu_unlock_one(struct vcpu *vcpu)
vcpu_set_state(vcpu, VCPU_IDLE, false);
}
#ifndef __amd64__
static int
vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
{
struct vcpu *vcpu;
int error;
uint16_t i, j, maxcpus;
error = 0;
maxcpus = vm_get_maxcpus(vm);
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
error = vcpu_lock_one(vcpu);
if (error)
break;
}
if (error) {
for (j = 0; j < i; j++) {
vcpu = vm_vcpu(vm, j);
if (vcpu == NULL)
continue;
vcpu_unlock_one(vcpu);
}
}
return (error);
}
#endif
static int
vcpu_lock_all(struct vmmdev_softc *sc)
{
+476
View File
@@ -0,0 +1,476 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*/
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <machine/smp.h>
#include <dev/vmm/vmm_vm.h>
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL);
int vmm_ipinum;
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
"IPI vector used for vcpu notifications");
/*
* Invoke the rendezvous function on the specified vcpu if applicable. Return
* true if the rendezvous is finished, false otherwise.
*/
static bool
vm_rendezvous(struct vcpu *vcpu)
{
struct vm *vm = vcpu->vm;
int vcpuid;
mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
KASSERT(vcpu->vm->rendezvous_func != NULL,
("vm_rendezvous: no rendezvous pending"));
/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
&vm->active_cpus);
vcpuid = vcpu->vcpuid;
if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
!CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
}
if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) {
CPU_ZERO(&vm->rendezvous_req_cpus);
vm->rendezvous_func = NULL;
wakeup(&vm->rendezvous_func);
return (true);
}
return (false);
}
int
vm_handle_rendezvous(struct vcpu *vcpu)
{
struct vm *vm;
struct thread *td;
td = curthread;
vm = vcpu->vm;
mtx_lock(&vm->rendezvous_mtx);
while (vm->rendezvous_func != NULL) {
if (vm_rendezvous(vcpu))
break;
mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
"vmrndv", hz);
if (td_ast_pending(td, TDA_SUSPEND)) {
int error;
mtx_unlock(&vm->rendezvous_mtx);
error = thread_check_susp(td, true);
if (error != 0)
return (error);
mtx_lock(&vm->rendezvous_mtx);
}
}
mtx_unlock(&vm->rendezvous_mtx);
return (0);
}
static void
vcpu_wait_idle(struct vcpu *vcpu)
{
KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
vcpu->reqidle = 1;
vcpu_notify_event_locked(vcpu);
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
}
int
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
bool from_idle)
{
int error;
vcpu_assert_locked(vcpu);
/*
* State transitions from the vmmdev_ioctl() must always begin from
* the VCPU_IDLE state. This guarantees that there is only a single
* ioctl() operating on a vcpu at any point.
*/
if (from_idle) {
while (vcpu->state != VCPU_IDLE)
vcpu_wait_idle(vcpu);
} else {
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
"vcpu idle state"));
}
if (vcpu->state == VCPU_RUNNING) {
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
} else {
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
"vcpu that is not running", vcpu->hostcpu));
}
/*
* The following state transitions are allowed:
* IDLE -> FROZEN -> IDLE
* FROZEN -> RUNNING -> FROZEN
* FROZEN -> SLEEPING -> FROZEN
*/
switch (vcpu->state) {
case VCPU_IDLE:
case VCPU_RUNNING:
case VCPU_SLEEPING:
error = (newstate != VCPU_FROZEN);
break;
case VCPU_FROZEN:
error = (newstate == VCPU_FROZEN);
break;
default:
error = 1;
break;
}
if (error)
return (EBUSY);
vcpu->state = newstate;
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
else
vcpu->hostcpu = NOCPU;
if (newstate == VCPU_IDLE)
wakeup(&vcpu->state);
return (0);
}
/*
* Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
* with vm_smp_rendezvous().
*
* The complexity here suggests that the rendezvous mechanism needs a rethink.
*/
int
vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
{
cpuset_t locked;
struct vcpu *vcpu;
int error, i;
uint16_t maxcpus;
KASSERT(newstate != VCPU_IDLE,
("vcpu_set_state_all: invalid target state %d", newstate));
error = 0;
CPU_ZERO(&locked);
maxcpus = vm->maxcpus;
mtx_lock(&vm->rendezvous_mtx);
restart:
if (vm->rendezvous_func != NULL) {
/*
* If we have a pending rendezvous, then the initiator may be
* blocked waiting for other vCPUs to execute the callback. The
* current thread may be a vCPU thread so we must not block
* waiting for the initiator, otherwise we get a deadlock.
* Thus, execute the callback on behalf of any idle vCPUs.
*/
for (i = 0; i < maxcpus; i++) {
vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
vcpu_lock(vcpu);
if (vcpu->state == VCPU_IDLE) {
(void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
true);
CPU_SET(i, &locked);
}
if (CPU_ISSET(i, &locked)) {
/*
* We can safely execute the callback on this
* vCPU's behalf.
*/
vcpu_unlock(vcpu);
(void)vm_rendezvous(vcpu);
vcpu_lock(vcpu);
}
vcpu_unlock(vcpu);
}
}
/*
* Now wait for remaining vCPUs to become idle. This may include the
* initiator of a rendezvous that is currently blocked on the rendezvous
* mutex.
*/
CPU_FOREACH_ISCLR(i, &locked) {
if (i >= maxcpus)
break;
vcpu = vm_vcpu(vm, i);
if (vcpu == NULL)
continue;
vcpu_lock(vcpu);
while (vcpu->state != VCPU_IDLE) {
mtx_unlock(&vm->rendezvous_mtx);
vcpu_wait_idle(vcpu);
vcpu_unlock(vcpu);
mtx_lock(&vm->rendezvous_mtx);
if (vm->rendezvous_func != NULL)
goto restart;
vcpu_lock(vcpu);
}
error = vcpu_set_state_locked(vcpu, newstate, true);
vcpu_unlock(vcpu);
if (error != 0) {
/* Roll back state changes. */
CPU_FOREACH_ISSET(i, &locked)
(void)vcpu_set_state(vcpu, VCPU_IDLE, false);
break;
}
CPU_SET(i, &locked);
}
mtx_unlock(&vm->rendezvous_mtx);
return (error);
}
int
vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
{
int error;
vcpu_lock(vcpu);
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
vcpu_unlock(vcpu);
return (error);
}
enum vcpu_state
vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
{
enum vcpu_state state;
vcpu_lock(vcpu);
state = vcpu->state;
if (hostcpu != NULL)
*hostcpu = vcpu->hostcpu;
vcpu_unlock(vcpu);
return (state);
}
/*
* This function is called to ensure that a vcpu "sees" a pending event
* as soon as possible:
* - If the vcpu thread is sleeping then it is woken up.
* - If the vcpu is running on a different host_cpu then an IPI will be directed
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
void
vcpu_notify_event_locked(struct vcpu *vcpu)
{
int hostcpu;
hostcpu = vcpu->hostcpu;
if (vcpu->state == VCPU_RUNNING) {
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
if (hostcpu != curcpu) {
ipi_cpu(hostcpu, vmm_ipinum);
} else {
/*
* If the 'vcpu' is running on 'curcpu' then it must
* be sending a notification to itself (e.g. SELF_IPI).
* The pending event will be picked up when the vcpu
* transitions back to guest context.
*/
}
} else {
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
"with hostcpu %d", vcpu->state, hostcpu));
if (vcpu->state == VCPU_SLEEPING)
wakeup_one(vcpu);
}
}
void
vcpu_notify_event(struct vcpu *vcpu)
{
vcpu_lock(vcpu);
vcpu_notify_event_locked(vcpu);
vcpu_unlock(vcpu);
}
int
vcpu_debugged(struct vcpu *vcpu)
{
return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
}
void
vm_lock_vcpus(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
}
void
vm_unlock_vcpus(struct vm *vm)
{
sx_unlock(&vm->vcpus_init_lock);
}
void
vm_disable_vcpu_creation(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
vm->dying = true;
sx_xunlock(&vm->vcpus_init_lock);
}
uint16_t
vm_get_maxcpus(struct vm *vm)
{
return (vm->maxcpus);
}
void
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus)
{
*sockets = vm->sockets;
*cores = vm->cores;
*threads = vm->threads;
*maxcpus = vm->maxcpus;
}
int
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus __unused)
{
/* Ignore maxcpus. */
if (sockets * cores * threads > vm->maxcpus)
return (EINVAL);
vm->sockets = sockets;
vm->cores = cores;
vm->threads = threads;
return (0);
}
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
int i;
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
return (EINVAL);
if (atomic_cmpset_int(&vm->suspend, 0, how) == 0)
return (EALREADY);
/*
* Notify all active vcpus that they are now suspended.
*/
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
return (0);
}
int
vm_reinit(struct vm *vm)
{
int error;
/*
* A virtual machine can be reset only if all vcpus are suspended.
*/
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
vm_reset(vm);
error = 0;
} else {
error = EBUSY;
}
return (error);
}
int
vm_activate_cpu(struct vcpu *vcpu)
{
struct vm *vm = vcpu->vm;
if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EBUSY);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
return (0);
}
int
vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
vm->debug_cpus = vm->active_cpus;
for (int i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EINVAL);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
vcpu_notify_event(vcpu);
}
return (0);
}
int
vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
CPU_ZERO(&vm->debug_cpus);
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
return (EINVAL);
CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
}
return (0);
}
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
}
cpuset_t
vm_debug_cpus(struct vm *vm)
{
return (vm->debug_cpus);
}
cpuset_t
vm_suspended_cpus(struct vm *vm)
{
return (vm->suspended_cpus);
}
+47 -2
View File
@@ -9,6 +9,7 @@
#define _DEV_VMM_VM_H_
#ifdef _KERNEL
#include <sys/_cpuset.h>
#include <machine/vmm.h>
@@ -50,11 +51,16 @@ struct vcpu {
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
extern int vmm_ipinum;
int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
#ifdef __amd64__
int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
bool from_idle);
int vcpu_set_state_all(struct vm *vm, enum vcpu_state state);
#endif
enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
void vcpu_notify_event(struct vcpu *vcpu);
void vcpu_notify_event_locked(struct vcpu *vcpu);
int vcpu_debugged(struct vcpu *vcpu);
static int __inline
vcpu_is_running(struct vcpu *vcpu, int *hostcpu)
@@ -74,6 +80,21 @@ vcpu_should_yield(struct vcpu *vcpu)
#endif
typedef void (*vm_rendezvous_func_t)(struct vcpu *vcpu, void *arg);
int vm_handle_rendezvous(struct vcpu *vcpu);
/*
* Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
* The rendezvous 'func(arg)' is not allowed to do anything that will
* cause the thread to be put to sleep.
*
* The caller cannot hold any locks when initiating the rendezvous.
*
* The implementation of this API may cause vcpus other than those specified
* by 'dest' to be stalled. The caller should not rely on any vcpus making
* forward progress when the rendezvous is in progress.
*/
int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest,
vm_rendezvous_func_t func, void *arg);
/*
* Initialization:
@@ -116,6 +137,30 @@ struct vm {
VMM_VM_MD_FIELDS;
};
int vm_create(const char *name, struct vm **retvm);
struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
void vm_reset(struct vm *vm);
void vm_lock_vcpus(struct vm *vm);
void vm_unlock_vcpus(struct vm *vm);
void vm_disable_vcpu_creation(struct vm *vm);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_activate_cpu(struct vcpu *vcpu);
int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
uint16_t vm_get_maxcpus(struct vm *vm);
void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus);
#endif /* _KERNEL */
#endif /* !_DEV_VMM_VM_H_ */
+2 -1
View File
@@ -18,7 +18,8 @@ SRCS+= vmm.c \
vmm_dev_machdep.c \
vmm_instruction_emul.c \
vmm_mem.c \
vmm_stat.c
vmm_stat.c \
vmm_vm.c
.if ${MACHINE_CPUARCH} == "aarch64"
CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io
+1 -27
View File
@@ -160,34 +160,18 @@ DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
vm_offset_t max));
DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
int vm_create(const char *name, struct vm **retvm);
struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
void vm_disable_vcpu_creation(struct vm *vm);
void vm_lock_vcpus(struct vm *vm);
void vm_unlock_vcpus(struct vm *vm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
uint16_t vm_get_maxcpus(struct vm *vm);
void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus);
int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
int vm_run(struct vcpu *vcpu);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
void* vm_get_cookie(struct vm *vm);
void *vm_get_cookie(struct vm *vm);
int vcpu_vcpuid(struct vcpu *vcpu);
void *vcpu_get_cookie(struct vcpu *vcpu);
struct vm *vcpu_vm(struct vcpu *vcpu);
struct vcpu *vm_vcpu(struct vm *vm, int cpu);
int vm_get_capability(struct vcpu *vcpu, int type, int *val);
int vm_set_capability(struct vcpu *vcpu, int type, int val);
int vm_activate_cpu(struct vcpu *vcpu);
int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
int vm_inject_exception(struct vcpu *vcpu, uint64_t scause);
int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr);
int vm_assert_irq(struct vm *vm, uint32_t irq);
@@ -197,13 +181,8 @@ int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
static __inline int
vcpu_rendezvous_pending(struct vm_eventinfo *info)
{
@@ -218,14 +197,9 @@ vcpu_suspended(struct vm_eventinfo *info)
return (*info->sptr);
}
int vcpu_debugged(struct vcpu *vcpu);
void *vcpu_stats(struct vcpu *vcpu);
void vcpu_notify_event(struct vcpu *vcpu);
struct vm_mem *vm_mem(struct vm *vm);
enum vm_reg_name vm_segment_name(int seg_encoding);
#endif /* _KERNEL */
#define VM_DIR_READ 0
+5 -312
View File
@@ -45,7 +45,6 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -83,14 +82,6 @@ static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
static int vmm_ipinum;
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
"IPI vector used for vcpu notifications");
static void vcpu_notify_event_locked(struct vcpu *vcpu);
/* global statistics */
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
@@ -179,14 +170,6 @@ vm_init(struct vm *vm, bool create)
}
}
void
vm_disable_vcpu_creation(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
vm->dying = true;
sx_xunlock(&vm->vcpus_init_lock);
}
struct vcpu *
vm_alloc_vcpu(struct vm *vm, int vcpuid)
{
@@ -217,18 +200,6 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
return (vcpu);
}
void
vm_lock_vcpus(struct vm *vm)
{
sx_xlock(&vm->vcpus_init_lock);
}
void
vm_unlock_vcpus(struct vm *vm)
{
sx_unlock(&vm->vcpus_init_lock);
}
int
vm_create(const char *name, struct vm **retvm)
{
@@ -242,6 +213,7 @@ vm_create(const char *name, struct vm **retvm)
return (error);
}
strcpy(vm->name, name);
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->sockets = 1;
@@ -258,35 +230,6 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
void
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus)
{
*sockets = vm->sockets;
*cores = vm->cores;
*threads = vm->threads;
*maxcpus = vm->maxcpus;
}
uint16_t
vm_get_maxcpus(struct vm *vm)
{
return (vm->maxcpus);
}
int
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus)
{
/* Ignore maxcpus. */
if ((sockets * cores * threads) > vm->maxcpus)
return (EINVAL);
vm->sockets = sockets;
vm->cores = cores;
vm->threads = threads;
return(0);
}
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -318,29 +261,15 @@ vm_cleanup(struct vm *vm, bool destroy)
void
vm_destroy(struct vm *vm)
{
vm_cleanup(vm, true);
free(vm, M_VMM);
}
int
vm_reinit(struct vm *vm)
void
vm_reset(struct vm *vm)
{
int error;
/*
* A virtual machine can be reset only if all vcpus are suspended.
*/
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
vm_cleanup(vm, false);
vm_init(vm, false);
error = 0;
} else {
error = EBUSY;
}
return (error);
vm_cleanup(vm, false);
vm_init(vm, false);
}
const char *
@@ -437,33 +366,6 @@ vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
return (0);
}
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
int i;
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
return (EINVAL);
if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
VM_CTR2(vm, "virtual machine already suspended %d/%d",
vm->suspend, how);
return (EALREADY);
}
VM_CTR1(vm, "virtual machine successfully suspended %d", how);
/*
* Notify all active vcpus that they are now suspended.
*/
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
return (0);
}
void
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
{
@@ -491,82 +393,6 @@ vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
vmexit->exitcode = VM_EXITCODE_DEBUG;
}
int
vm_activate_cpu(struct vcpu *vcpu)
{
struct vm *vm = vcpu->vm;
if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EBUSY);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
return (0);
}
int
vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
vm->debug_cpus = vm->active_cpus;
for (int i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
vcpu_notify_event(vm_vcpu(vm, i));
}
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
return (EINVAL);
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
vcpu_notify_event(vcpu);
}
return (0);
}
int
vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
{
if (vcpu == NULL) {
CPU_ZERO(&vm->debug_cpus);
} else {
if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
return (EINVAL);
CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
}
return (0);
}
int
vcpu_debugged(struct vcpu *vcpu)
{
return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
}
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
}
cpuset_t
vm_debug_cpus(struct vm *vm)
{
return (vm->debug_cpus);
}
cpuset_t
vm_suspended_cpus(struct vm *vm)
{
return (vm->suspended_cpus);
}
void *
vcpu_stats(struct vcpu *vcpu)
{
@@ -574,47 +400,6 @@ vcpu_stats(struct vcpu *vcpu)
return (vcpu->stats);
}
/*
* This function is called to ensure that a vcpu "sees" a pending event
* as soon as possible:
* - If the vcpu thread is sleeping then it is woken up.
* - If the vcpu is running on a different host_cpu then an IPI will be directed
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
static void
vcpu_notify_event_locked(struct vcpu *vcpu)
{
int hostcpu;
hostcpu = vcpu->hostcpu;
if (vcpu->state == VCPU_RUNNING) {
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
if (hostcpu != curcpu) {
ipi_cpu(hostcpu, vmm_ipinum);
} else {
/*
* If the 'vcpu' is running on 'curcpu' then it must
* be sending a notification to itself (e.g. SELF_IPI).
* The pending event will be picked up when the vcpu
* transitions back to guest context.
*/
}
} else {
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
"with hostcpu %d", vcpu->state, hostcpu));
if (vcpu->state == VCPU_SLEEPING)
wakeup_one(vcpu);
}
}
void
vcpu_notify_event(struct vcpu *vcpu)
{
vcpu_lock(vcpu);
vcpu_notify_event_locked(vcpu);
vcpu_unlock(vcpu);
}
struct vm_mem *
vm_mem(struct vm *vm)
{
@@ -655,72 +440,6 @@ save_guest_fpustate(struct vcpu *vcpu)
("%s: fpcurthread set with guest registers", __func__));
}
static int
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
bool from_idle)
{
int error;
vcpu_assert_locked(vcpu);
/*
* State transitions from the vmmdev_ioctl() must always begin from
* the VCPU_IDLE state. This guarantees that there is only a single
* ioctl() operating on a vcpu at any point.
*/
if (from_idle) {
while (vcpu->state != VCPU_IDLE) {
vcpu_notify_event_locked(vcpu);
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
}
} else {
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
"vcpu idle state"));
}
if (vcpu->state == VCPU_RUNNING) {
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
} else {
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
"vcpu that is not running", vcpu->hostcpu));
}
/*
* The following state transitions are allowed:
* IDLE -> FROZEN -> IDLE
* FROZEN -> RUNNING -> FROZEN
* FROZEN -> SLEEPING -> FROZEN
*/
switch (vcpu->state) {
case VCPU_IDLE:
case VCPU_RUNNING:
case VCPU_SLEEPING:
error = (newstate != VCPU_FROZEN);
break;
case VCPU_FROZEN:
error = (newstate == VCPU_FROZEN);
break;
default:
error = 1;
break;
}
if (error)
return (EBUSY);
vcpu->state = newstate;
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
else
vcpu->hostcpu = NOCPU;
if (newstate == VCPU_IDLE)
wakeup(&vcpu->state);
return (0);
}
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
{
@@ -787,32 +506,6 @@ vm_vcpu(struct vm *vm, int vcpuid)
return (vm->vcpu[vcpuid]);
}
int
vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
{
int error;
vcpu_lock(vcpu);
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
vcpu_unlock(vcpu);
return (error);
}
enum vcpu_state
vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
{
enum vcpu_state state;
vcpu_lock(vcpu);
state = vcpu->state;
if (hostcpu != NULL)
*hostcpu = vcpu->hostcpu;
vcpu_unlock(vcpu);
return (state);
}
int
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
{
+1
View File
@@ -48,6 +48,7 @@
#include <machine/vmm_instruction_emul.h>
#include <dev/vmm/vmm_dev.h>
#include <dev/vmm/vmm_vm.h>
MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC");
+2
View File
@@ -36,6 +36,8 @@
#include <machine/sbi.h>
#include <dev/vmm/vmm_vm.h>
#include "riscv.h"
#include "vmm_fence.h"
+2
View File
@@ -39,6 +39,8 @@
#include <dev/ofw/ofw_bus_subr.h>
#include <dev/ofw/openfirm.h>
#include <dev/vmm/vmm_vm.h>
#include "riscv.h"
#define VTIMER_DEFAULT_FREQ 1000000