hwpstate_amd: Expose nodes as much as possible in legacy pstate

Reviewed by:    olce
Sponsored by:   The FreeBSD Foundation
Differential Revision:  https://reviews.freebsd.org/D55606
This commit is contained in:
ShengYi Hung
2026-04-08 15:45:11 +08:00
parent a71fea51ce
commit 095cbb1bb7
+91 -65
View File
@@ -552,6 +552,20 @@ hwpstate_amd_iscale(int val, int div)
return (val);
}
static void
hwpstate_pstate_read_limit(int cpu, uint64_t *msr)
{
(void)x86_msr_op(MSR_AMD_10H_11H_LIMIT,
MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
}
static void
hwpstate_pstate_read_status(int cpu, uint64_t *msr)
{
(void)x86_msr_op(MSR_AMD_10H_11H_STATUS,
MSR_OP_READ | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), 0, msr);
}
/*
* Go to Px-state on all cpus, considering the limit register (if so
* configured).
@@ -561,11 +575,13 @@ hwpstate_goto_pstate(device_t dev, int id)
{
sbintime_t sbt;
uint64_t msr;
int cpu, i, j, limit;
int cpu, j, limit;
cpu = cpu_get_pcpu(dev)->pc_cpuid;
if (hwpstate_pstate_limit) {
/* get the current pstate limit */
msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
hwpstate_pstate_read_limit(cpu, &msr);
limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
if (limit > id) {
HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
@@ -574,53 +590,31 @@ hwpstate_goto_pstate(device_t dev, int id)
}
}
cpu = curcpu;
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu);
/* Go To Px-state */
wrmsr(MSR_AMD_10H_11H_CONTROL, id);
/*
* We are going to the same Px-state on all cpus.
* Probably should take _PSD into account.
*/
CPU_FOREACH(i) {
if (i == cpu)
continue;
/* Bind to each cpu. */
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i);
/* Go To Px-state */
wrmsr(MSR_AMD_10H_11H_CONTROL, id);
}
x86_msr_op(MSR_AMD_10H_11H_CONTROL,
MSR_OP_WRITE | MSR_OP_RENDEZVOUS_ONE | MSR_OP_CPUID(cpu), id, NULL);
/*
* Verify whether each core is in the requested P-state.
*/
if (hwpstate_verify) {
CPU_FOREACH(i) {
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
/* wait loop (100*100 usec is enough ?) */
for (j = 0; j < 100; j++) {
/* get the result. not assure msr=id */
msr = rdmsr(MSR_AMD_10H_11H_STATUS);
if (msr == id)
break;
sbt = SBT_1MS / 10;
tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
sbt >> tc_precexp, 0);
}
HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n",
(int)msr, i);
if (msr != id) {
HWPSTATE_DEBUG(dev,
"error: loop is not enough.\n");
return (ENXIO);
}
/* wait loop (100*100 usec is enough ?) */
for (j = 0; j < 100; j++) {
/* get the result. not assure msr=id */
hwpstate_pstate_read_status(cpu, &msr);
if (msr == id)
break;
sbt = SBT_1MS / 10;
tsleep_sbt(dev, PZERO, "pstate_goto", sbt,
sbt >> tc_precexp, 0);
}
HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr,
cpu);
if (msr != id) {
HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
return (ENXIO);
}
}
@@ -670,7 +664,6 @@ hwpstate_get_cppc(device_t dev, struct cf_setting *cf)
pc = cpu_get_pcpu(dev);
if (pc == NULL)
return (ENXIO);
memset(cf, CPUFREQ_VAL_UNKNOWN, sizeof(*cf));
cf->dev = dev;
if ((ret = cpu_est_clockrate(pc->pc_cpuid, &rate)))
@@ -685,13 +678,14 @@ hwpstate_get_pstate(device_t dev, struct cf_setting *cf)
struct hwpstate_softc *sc;
struct hwpstate_setting set;
uint64_t msr;
int cpu;
sc = device_get_softc(dev);
msr = rdmsr(MSR_AMD_10H_11H_STATUS);
cpu = cpu_get_pcpu(dev)->pc_cpuid;
hwpstate_pstate_read_status(cpu, &msr);
if (msr >= sc->cfnum)
return (EINVAL);
set = sc->hwpstate_settings[msr];
cf->freq = set.freq;
cf->volts = set.volts;
cf->power = set.power;
@@ -967,8 +961,10 @@ hwpstate_probe_pstate(device_t dev)
device_t perf_dev;
int error, type;
uint64_t msr;
int cpu;
sc = device_get_softc(dev);
cpu = cpu_get_pcpu(dev)->pc_cpuid;
/*
* Check if acpi_perf has INFO only flag.
*/
@@ -985,15 +981,15 @@ hwpstate_probe_pstate(device_t dev)
*/
HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
return (ENXIO);
} else {
/*
* If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
* we can get _PSS info from acpi_perf
* without going into ACPI.
*/
HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
}
/*
* If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
* we can get _PSS info from acpi_perf
* without going into ACPI.
*/
HWPSTATE_DEBUG(dev,
"going to fetch info from acpi_perf\n");
error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
}
}
@@ -1002,7 +998,7 @@ hwpstate_probe_pstate(device_t dev)
* Now we get _PSS info from acpi_perf without error.
* Let's check it.
*/
msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
hwpstate_pstate_read_limit(cpu, &msr);
if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)"
" count mismatch\n", (intmax_t)msr, sc->cfnum);
@@ -1042,15 +1038,8 @@ hwpstate_probe(device_t dev)
sc->flags |= HWPFL_USE_CPPC;
device_set_desc(dev,
"AMD Collaborative Processor Performance Control (CPPC)");
} else {
/*
* No CPPC support. Only keep hwpstate0, it goes well with
* acpi_throttle.
*/
if (device_get_unit(dev) != 0)
return (ENXIO);
} else
device_set_desc(dev, "Cool`n'Quiet 2.0");
}
sc->dev = dev;
if ((sc->flags & HWPFL_USE_CPPC) != 0) {
@@ -1119,22 +1108,59 @@ hwpstate_attach(device_t dev)
return (cpufreq_register(dev));
}
struct hwpstate_pstate_read_settings_cb {
struct hwpstate_softc *sc;
uint64_t *vals;
int err;
};
static void
hwpstate_pstate_read_settings_cb(void *args)
{
struct hwpstate_pstate_read_settings_cb *req = args;
int i;
req->err = 0;
for (i = 0; i < req->sc->cfnum; i++) {
req->err = rdmsr_safe(MSR_AMD_10H_11H_CONFIG + i,
&req->vals[i]);
if (req->err != 0)
return;
}
}
static int
hwpstate_pstate_read_settings(struct hwpstate_softc *sc, uint64_t vals[])
{
struct hwpstate_pstate_read_settings_cb req;
device_t dev;
req.sc = sc;
req.vals = vals;
dev = sc->dev;
smp_rendezvous_cpu(cpu_get_pcpu(dev)->pc_cpuid,
smp_no_rendezvous_barrier, hwpstate_pstate_read_settings_cb,
smp_no_rendezvous_barrier, &req);
return (req.err);
}
static int
hwpstate_get_info_from_msr(device_t dev)
{
struct hwpstate_softc *sc;
struct hwpstate_setting *hwpstate_set;
uint64_t msr;
uint64_t state_settings[AMD_10H_11H_MAX_STATES], msr;
int family, i, fid, did;
family = CPUID_TO_FAMILY(cpu_id);
sc = device_get_softc(dev);
/* Get pstate count */
msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
hwpstate_pstate_read_limit(cpu_get_pcpu(dev)->pc_cpuid, &msr);
sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
hwpstate_set = sc->hwpstate_settings;
hwpstate_pstate_read_settings(sc, state_settings);
for (i = 0; i < sc->cfnum; i++) {
msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
msr = state_settings[i];
if ((msr & ((uint64_t)1 << 63)) == 0) {
HWPSTATE_DEBUG(dev, "msr is not valid.\n");
return (ENXIO);