hwpmc_ibs: Add Zen6 IBS ctl2 filters and alternate disable
Add kernel and userland support for Zen6 IBS extensions per AMD pub 69205 (rev 1.00, March 2026): alternate fetch/op disable via ctl2[0], fetch latency filtering, virtual address bit 63 filtering, and streaming-store filtering. Decode the new IbsOpData2 StrmSt and RmtSocket bits. Update libpmc, pmcstat and manpage. Pre-Zen6 systems work unchanged with ibs_ctl2 == 0. Signed-off-by: Andre Silva <andasilv@amd.com> Reviewed by: Ali Mashtizadeh <ali@mashtizadeh.com>, mhorne Sponsored by: AMD Differential Revision: https://reviews.freebsd.org/D56914
This commit is contained in:
committed by
Mitchell Horne
parent
ff46acfd52
commit
0aa4c25f3e
+53
-2
@@ -700,13 +700,14 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||
struct pmc_op_pmcallocate *pmc_config)
|
||||
{
|
||||
char *e, *p, *q;
|
||||
uint64_t ctl, ldlat;
|
||||
uint64_t ctl, ctl2, ldlat, fetchlat;
|
||||
u_int ibs_features;
|
||||
u_int regs[4];
|
||||
|
||||
pmc_config->pm_caps |=
|
||||
(PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);
|
||||
pmc_config->pm_md.pm_ibs.ibs_ctl = 0;
|
||||
pmc_config->pm_md.pm_ibs.ibs_ctl2 = 0;
|
||||
|
||||
/* setup parsing tables */
|
||||
switch (pe) {
|
||||
@@ -735,6 +736,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||
|
||||
/* parse parameters */
|
||||
ctl = 0;
|
||||
ctl2 = 0;
|
||||
if (pe == PMC_EV_IBS_FETCH) {
|
||||
while ((p = strsep(&ctrspec, ",")) != NULL) {
|
||||
if (KWMATCH(p, "l3miss")) {
|
||||
@@ -744,6 +746,37 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||
ctl |= IBS_FETCH_CTL_L3MISSONLY;
|
||||
} else if (KWMATCH(p, "randomize")) {
|
||||
ctl |= IBS_FETCH_CTL_RANDOMIZE;
|
||||
} else if (KWPREFIXMATCH(p, "fetchlat=")) {
|
||||
if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) == 0)
|
||||
return (-1);
|
||||
|
||||
q = strchr(p, '=');
|
||||
if (*++q == '\0')
|
||||
return (-1);
|
||||
|
||||
fetchlat = strtoull(q, &e, 0);
|
||||
if (e == q || *e != '\0')
|
||||
return (-1);
|
||||
|
||||
if (fetchlat < IBS_FETCH_CTL2_LAT_MIN ||
|
||||
fetchlat > IBS_FETCH_CTL2_LAT_MAX)
|
||||
return (-1);
|
||||
if ((fetchlat % IBS_FETCH_CTL2_LAT_STEP) != 0)
|
||||
return (-1);
|
||||
|
||||
/* clear prior threshold */
|
||||
ctl2 &= ~IBS_FETCH_CTL2_LATFILTERMASK;
|
||||
ctl2 |= IBS_FETCH_CTL2_LAT_TO_CTL(fetchlat);
|
||||
} else if (KWMATCH(p, "usr")) {
|
||||
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
|
||||
return (-1);
|
||||
|
||||
pmc_config->pm_caps |= PMC_CAP_USER;
|
||||
} else if (KWMATCH(p, "os")) {
|
||||
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
|
||||
return (-1);
|
||||
|
||||
pmc_config->pm_caps |= PMC_CAP_SYSTEM;
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
@@ -783,6 +816,9 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||
*/
|
||||
if (ldlat < 128 || ldlat > 2048)
|
||||
return (-1);
|
||||
|
||||
/* clear prior ldlat threshold */
|
||||
ctl &= ~IBS_OP_CTL_LDLATTRSHMASK;
|
||||
ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat);
|
||||
ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN;
|
||||
} else if (KWMATCH(p, "opcount")) {
|
||||
@@ -790,6 +826,21 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||
return (-1);
|
||||
|
||||
ctl |= IBS_OP_CTL_COUNTERCONTROL;
|
||||
} else if (KWMATCH(p, "usr")) {
|
||||
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
|
||||
return (-1);
|
||||
|
||||
pmc_config->pm_caps |= PMC_CAP_USER;
|
||||
} else if (KWMATCH(p, "os")) {
|
||||
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
|
||||
return (-1);
|
||||
|
||||
pmc_config->pm_caps |= PMC_CAP_SYSTEM;
|
||||
} else if (KWMATCH(p, "streamstore")) {
|
||||
if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) == 0)
|
||||
return (-1);
|
||||
|
||||
ctl2 |= IBS_OP_CTL2_STRMSTFILTER;
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
@@ -806,8 +857,8 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
|
||||
ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count);
|
||||
}
|
||||
|
||||
|
||||
pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
|
||||
pmc_config->pm_md.pm_ibs.ibs_ctl2 |= ctl2;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
+73
-4
@@ -89,7 +89,7 @@ AMD IBS supports the following capabilities.
|
||||
.It PMC_CAP_SYSTEM Ta Yes
|
||||
.It PMC_CAP_TAGGING Ta \&No
|
||||
.It PMC_CAP_THRESHOLD Ta \&No
|
||||
.It PMC_CAP_USER Ta \&No
|
||||
.It PMC_CAP_USER Ta Yes (Zen 6)
|
||||
.It PMC_CAP_WRITE Ta \&No
|
||||
.El
|
||||
.Pp
|
||||
@@ -97,25 +97,91 @@ By default AMD IBS enables the edge, interrupt, system and precise flags.
|
||||
.Ss Event Qualifiers
|
||||
Event specifiers for AMD IBS can have the following optional
|
||||
qualifiers:
|
||||
.Bl -tag -width "ldlat=value"
|
||||
.Bl -tag -width "fetchlat=value"
|
||||
.It Li usr
|
||||
Valid for both
|
||||
.Ar ibs-fetch
|
||||
and
|
||||
.Ar ibs-op
|
||||
events.
|
||||
Configure the counter to only sample user-mode events.
|
||||
Requires Zen 6 IBS extensions
|
||||
.Pq CPUID Fn Fn8000_0001B
|
||||
.Va EAX[IbsAddrBit63Filtering] ,
|
||||
and is rejected when the CPU does not advertise support.
|
||||
.It Li os
|
||||
Valid for both
|
||||
.Ar ibs-fetch
|
||||
and
|
||||
.Ar ibs-op
|
||||
events.
|
||||
Configure the counter to only sample kernel-mode events.
|
||||
Requires Zen 6 IBS extensions
|
||||
.Pq CPUID Fn Fn8000_0001B
|
||||
.Va EAX[IbsAddrBit63Filtering] ,
|
||||
and is rejected when the CPU does not advertise support.
|
||||
.It Li fetchlat= Ns Ar value
|
||||
Valid only for
|
||||
.Ar ibs-fetch
|
||||
events.
|
||||
Configure the counter to only sample fetches whose latency is greater than or
|
||||
equal to
|
||||
.Ar value
|
||||
core clock cycles.
|
||||
The valid range is 128 to 1920 in steps of 128.
|
||||
Requires Zen 6 IBS extensions
|
||||
.Pq CPUID Fn Fn8000_0001B
|
||||
.Va EAX[IbsFetchLatencyFiltering] ,
|
||||
and is rejected when the CPU does not advertise support.
|
||||
.It Li l3miss
|
||||
Valid for both
|
||||
.Ar ibs-fetch
|
||||
and
|
||||
.Ar ibs-op
|
||||
events.
|
||||
Configure IBS to only sample if an l3miss occurred.
|
||||
.It Li ldlat= Ns Ar value
|
||||
Valid only for
|
||||
.Ar ibs-op
|
||||
events.
|
||||
Configure the counter to only sample events with load latencies above
|
||||
.Ar ldlat .
|
||||
IBS only supports filtering latencies that are a multiple of 128 and between
|
||||
128 and 2048.
|
||||
Load latency filtering can only be used with ibs-op events and imply the
|
||||
l3miss qualifier.
|
||||
On pre-Zen 6 hardware this qualifier implies the
|
||||
.Li l3miss
|
||||
qualifier; on Zen 6 and later, latency-only filtering without
|
||||
.Li l3miss
|
||||
is permitted.
|
||||
.It Li opcount
|
||||
Valid only for
|
||||
.Ar ibs-op
|
||||
events.
|
||||
Count ops rather than cycles.
|
||||
.It Li randomize
|
||||
Valid only for
|
||||
.Ar ibs-fetch
|
||||
events.
|
||||
Randomize the sampling rate.
|
||||
.It Li streamstore
|
||||
Valid only for
|
||||
.Ar ibs-op
|
||||
events.
|
||||
Configure the counter to only sample streaming
|
||||
.Pq non-temporal
|
||||
store operations.
|
||||
Requires Zen 6 IBS extensions
|
||||
.Pq CPUID Fn Fn8000_0001B
|
||||
.Va EAX[IbsStrmStAndRmtSocket] ,
|
||||
and is rejected when the CPU does not advertise support.
|
||||
.El
|
||||
.Ss AMD IBS Events Specifiers
|
||||
The IBS event class provides only two event specifiers:
|
||||
.Bl -tag -width indent
|
||||
.It Li ibs-fetch Xo
|
||||
.Op ,usr
|
||||
.Op ,os
|
||||
.Op ,fetchlat= Ns Ar value
|
||||
.Op ,l3miss
|
||||
.Op ,randomize
|
||||
.Xc
|
||||
@@ -124,9 +190,12 @@ The
|
||||
.Ar randomize
|
||||
qualifier randomly sets the bottom four bits of the sample rate.
|
||||
.It Li ibs-op Xo
|
||||
.Op ,usr
|
||||
.Op ,os
|
||||
.Op ,l3miss
|
||||
.Op ,ldlat= Ns Ar ldlat
|
||||
.Op ,opcount
|
||||
.Op ,streamstore
|
||||
.Xc
|
||||
Collect performance samples during instruction execution.
|
||||
The
|
||||
|
||||
+193
-21
@@ -60,9 +60,15 @@ struct ibs_descr {
|
||||
static uint64_t ibs_features;
|
||||
static uint64_t ibs_fetch_allowed_mask;
|
||||
static uint64_t ibs_op_allowed_mask;
|
||||
static uint64_t ibs_fetch_ctl2_allowed_mask;
|
||||
static uint64_t ibs_op_ctl2_allowed_mask;
|
||||
static bool ibs_fetch_ctl2_supported;
|
||||
static bool ibs_op_ctl2_supported;
|
||||
|
||||
static uint64_t ibs_fetch_extra_mask;
|
||||
static uint64_t ibs_fetch_ctl2_extra_mask;
|
||||
static uint64_t ibs_op_extra_mask;
|
||||
static uint64_t ibs_op_ctl2_extra_mask;
|
||||
|
||||
SYSCTL_DECL(_kern_hwpmc);
|
||||
|
||||
@@ -70,10 +76,18 @@ SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_extra_mask, CTLFLAG_RDTUN,
|
||||
&ibs_fetch_extra_mask, 0,
|
||||
"Extra allowed bits in the IBS fetch control MSR (override; default 0)");
|
||||
|
||||
SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_ctl2_extra_mask, CTLFLAG_RDTUN,
|
||||
&ibs_fetch_ctl2_extra_mask, 0,
|
||||
"Extra allowed bits in the IBS fetch control 2 MSR (override; default 0)");
|
||||
|
||||
SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_extra_mask, CTLFLAG_RDTUN,
|
||||
&ibs_op_extra_mask, 0,
|
||||
"Extra allowed bits in the IBS op control MSR (override; default 0)");
|
||||
|
||||
SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_ctl2_extra_mask, CTLFLAG_RDTUN,
|
||||
&ibs_op_ctl2_extra_mask, 0,
|
||||
"Extra allowed bits in the IBS op control 2 MSR (override; default 0)");
|
||||
|
||||
/*
|
||||
* Per-processor information
|
||||
*/
|
||||
@@ -92,8 +106,10 @@ ibs_init_policy(void)
|
||||
{
|
||||
|
||||
ibs_fetch_allowed_mask = IBS_FETCH_ALLOWED_MASK_BASE;
|
||||
ibs_fetch_ctl2_allowed_mask = 0;
|
||||
|
||||
ibs_op_allowed_mask = IBS_OP_CTL_MAXCNTBASEMASK;
|
||||
ibs_op_ctl2_allowed_mask = 0;
|
||||
|
||||
if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0)
|
||||
ibs_fetch_allowed_mask |= IBS_FETCH_CTL_L3MISSONLY;
|
||||
@@ -106,6 +122,26 @@ ibs_init_policy(void)
|
||||
|
||||
if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0)
|
||||
ibs_op_allowed_mask |= IBS_OP_CTL_L3MISSONLY;
|
||||
|
||||
if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) != 0)
|
||||
ibs_fetch_ctl2_allowed_mask |= IBS_FETCH_CTL2_LATFILTERMASK;
|
||||
|
||||
if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) != 0)
|
||||
ibs_op_ctl2_allowed_mask |= IBS_OP_CTL2_STRMSTFILTER;
|
||||
|
||||
if ((ibs_features & CPUID_IBSID_IBSDIS) != 0) {
|
||||
ibs_fetch_ctl2_supported = true;
|
||||
ibs_op_ctl2_supported = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* ctl2 MSRs only exist on Zen 6; writing them on older silicon
|
||||
* would #GP.
|
||||
*/
|
||||
if (!ibs_fetch_ctl2_supported)
|
||||
ibs_fetch_ctl2_supported = (ibs_fetch_ctl2_allowed_mask != 0);
|
||||
if (!ibs_op_ctl2_supported)
|
||||
ibs_op_ctl2_supported = (ibs_op_ctl2_allowed_mask != 0);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -128,7 +164,12 @@ ibs_validate_op_config(uint64_t config)
|
||||
if ((config & IBS_OP_CTL_LATFLTEN) != 0) {
|
||||
if ((ibs_features & CPUID_IBSID_IBSLOADLATENCYFILT) == 0)
|
||||
return (EINVAL);
|
||||
if ((config & IBS_OP_CTL_L3MISSONLY) == 0)
|
||||
/*
|
||||
* Zen 6 decouples L3MISSONLY from load-latency filtering
|
||||
* (AMD pub 69205); enforce the pairing only on older parts.
|
||||
*/
|
||||
if ((ibs_features & CPUID_IBSID_IBSDIS) == 0 &&
|
||||
(config & IBS_OP_CTL_L3MISSONLY) == 0)
|
||||
return (EINVAL);
|
||||
|
||||
allowed_mask |= IBS_OP_CTL_LDLATMASK | IBS_OP_CTL_L3MISSONLY;
|
||||
@@ -143,16 +184,67 @@ ibs_validate_op_config(uint64_t config)
|
||||
}
|
||||
|
||||
static int
|
||||
ibs_validate_pmc_config(int ri, uint64_t config)
|
||||
ibs_validate_fetch_ctl2_config(uint64_t config)
|
||||
{
|
||||
uint64_t allowed_mask;
|
||||
|
||||
if (config == 0)
|
||||
return (0);
|
||||
|
||||
if (!ibs_fetch_ctl2_supported)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS fetch ctl2 features are not supported on this CPU"));
|
||||
|
||||
allowed_mask = ibs_fetch_ctl2_allowed_mask | ibs_fetch_ctl2_extra_mask;
|
||||
|
||||
if ((config & ~allowed_mask) != 0)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS fetch ctl2 config 0x%jx has bits outside allowed"
|
||||
" mask 0x%jx", (uint64_t)config, (uint64_t)allowed_mask));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ibs_validate_op_ctl2_config(uint64_t config)
|
||||
{
|
||||
uint64_t allowed_mask;
|
||||
|
||||
if (config == 0)
|
||||
return (0);
|
||||
|
||||
if (!ibs_op_ctl2_supported)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS op ctl2 features are not supported on this CPU"));
|
||||
|
||||
allowed_mask = ibs_op_ctl2_allowed_mask | ibs_op_ctl2_extra_mask;
|
||||
|
||||
if ((config & ~allowed_mask) != 0)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS op ctl2 config 0x%jx has bits outside allowed mask"
|
||||
" 0x%jx", (uint64_t)config, (uint64_t)allowed_mask));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ibs_validate_pmc_config(int ri, uint64_t config, uint64_t config2)
|
||||
{
|
||||
int error;
|
||||
|
||||
switch (ri) {
|
||||
case IBS_PMC_FETCH:
|
||||
return (ibs_validate_fetch_config(config));
|
||||
error = ibs_validate_fetch_config(config);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
return (ibs_validate_fetch_ctl2_config(config2));
|
||||
case IBS_PMC_OP:
|
||||
return (ibs_validate_op_config(config));
|
||||
error = ibs_validate_op_config(config);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
return (ibs_validate_op_ctl2_config(config2));
|
||||
default:
|
||||
return (EINVAL);
|
||||
return (EXTERROR(EINVAL, "invalid IBS PMC index %d", ri));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,7 +358,7 @@ static int
|
||||
ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
|
||||
const struct pmc_op_pmcallocate *a)
|
||||
{
|
||||
uint64_t caps, config;
|
||||
uint64_t caps, config, config2;
|
||||
int error;
|
||||
|
||||
KASSERT(ri >= 0 && ri < IBS_NPMCS,
|
||||
@@ -284,20 +376,53 @@ ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
|
||||
|
||||
PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps);
|
||||
|
||||
if ((caps & PMC_CAP_SYSTEM) == 0)
|
||||
return (EXTERROR(EINVAL, "IBS requires SYSTEM capability"));
|
||||
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) {
|
||||
if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS requires at least USER or SYSTEM capability"));
|
||||
} else {
|
||||
if ((caps & PMC_CAP_SYSTEM) == 0)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS requires SYSTEM capability"));
|
||||
if ((caps & PMC_CAP_USER) != 0)
|
||||
return (EXTERROR(EINVAL,
|
||||
"IBS USER filtering requires Zen 6 addr63 support"));
|
||||
}
|
||||
|
||||
if (!PMC_IS_SAMPLING_MODE(a->pm_mode))
|
||||
return (EINVAL);
|
||||
|
||||
config = a->pm_md.pm_ibs.ibs_ctl;
|
||||
error = ibs_validate_pmc_config(ri, config);
|
||||
config2 = a->pm_md.pm_ibs.ibs_ctl2;
|
||||
error = ibs_validate_pmc_config(ri, config, config2);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
pm->pm_md.pm_ibs.ibs_ctl = config;
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 = config2;
|
||||
|
||||
PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%jx", ri,
|
||||
config);
|
||||
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) {
|
||||
if ((caps & PMC_CAP_USER) != 0 &&
|
||||
(caps & PMC_CAP_SYSTEM) == 0) {
|
||||
if (ri == IBS_PMC_FETCH)
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 |=
|
||||
IBS_FETCH_CTL2_EXCLADDR63EQ1;
|
||||
else
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 |=
|
||||
IBS_OP_CTL2_EXCLRIP63EQ1;
|
||||
} else if ((caps & PMC_CAP_SYSTEM) != 0 &&
|
||||
(caps & PMC_CAP_USER) == 0) {
|
||||
if (ri == IBS_PMC_FETCH)
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 |=
|
||||
IBS_FETCH_CTL2_EXCLADDR63EQ0;
|
||||
else
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 |=
|
||||
IBS_OP_CTL2_EXCLRIP63EQ0;
|
||||
}
|
||||
}
|
||||
|
||||
PMCDBG3(MDP, ALL, 2,
|
||||
"ibs-allocate ri=%d -> config=0x%jx config2=0x%jx", ri,
|
||||
config, config2);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -349,16 +474,24 @@ ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm)
|
||||
|
||||
/*
|
||||
* Turn on the ENABLE bit. Zeroing out the control register eliminates
|
||||
* stale valid bits from spurious NMIs and it resets the counter.
|
||||
* stale valid bits from spurious NMIs and it resets the counter. This
|
||||
* is safe here because the counter is not yet enabled; the NMI re-arm
|
||||
* path must not do the same (Family 10h erratum #420).
|
||||
*/
|
||||
switch (ri) {
|
||||
case IBS_PMC_FETCH:
|
||||
wrmsr(IBS_FETCH_CTL, 0);
|
||||
if (ibs_fetch_ctl2_supported)
|
||||
wrmsr(IBS_FETCH_CTL2,
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_FETCH_CTL2_DISABLE);
|
||||
config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE;
|
||||
wrmsr(IBS_FETCH_CTL, config);
|
||||
break;
|
||||
case IBS_PMC_OP:
|
||||
wrmsr(IBS_OP_CTL, 0);
|
||||
if (ibs_op_ctl2_supported)
|
||||
wrmsr(IBS_OP_CTL2,
|
||||
pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_OP_CTL2_DISABLE);
|
||||
config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE;
|
||||
wrmsr(IBS_OP_CTL, config);
|
||||
break;
|
||||
@@ -374,7 +507,8 @@ static int
|
||||
ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
|
||||
{
|
||||
int i;
|
||||
uint64_t config;
|
||||
uint64_t config, config2;
|
||||
bool use_alt_disable;
|
||||
|
||||
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
||||
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
|
||||
@@ -394,23 +528,47 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
|
||||
* are stopping and discard spurious NMIs. We then retry clearing the
|
||||
* control register for 50us. This gives us enough time and ensures
|
||||
* that the valid bit is not accidently stuck after a spurious NMI.
|
||||
*
|
||||
* On Zen 6 with the alternate disable bit (CPUID IbsDis), assert the
|
||||
* ctl2 DISABLE bit first. This avoids an RMW hazard in ctl1 that the
|
||||
* processor may update concurrently while sampling.
|
||||
*/
|
||||
config = pm->pm_md.pm_ibs.ibs_ctl;
|
||||
config2 = pm->pm_md.pm_ibs.ibs_ctl2;
|
||||
use_alt_disable = (ibs_features & CPUID_IBSID_IBSDIS) != 0;
|
||||
|
||||
atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING);
|
||||
|
||||
/*
|
||||
* On Zen 6, ctl2 DISABLE is the authoritative stop switch; skip
|
||||
* the legacy ctl1 RMW and clear it directly
|
||||
*/
|
||||
switch (ri) {
|
||||
case IBS_PMC_FETCH:
|
||||
wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK);
|
||||
DELAY(1);
|
||||
config &= ~IBS_FETCH_CTL_ENABLE;
|
||||
wrmsr(IBS_FETCH_CTL, config);
|
||||
if (use_alt_disable) {
|
||||
wrmsr(IBS_FETCH_CTL2,
|
||||
config2 | IBS_FETCH_CTL2_DISABLE);
|
||||
wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_ENABLE);
|
||||
} else {
|
||||
wrmsr(IBS_FETCH_CTL,
|
||||
config & ~IBS_FETCH_CTL_MAXCNTMASK);
|
||||
DELAY(1);
|
||||
config &= ~IBS_FETCH_CTL_ENABLE;
|
||||
wrmsr(IBS_FETCH_CTL, config);
|
||||
}
|
||||
break;
|
||||
case IBS_PMC_OP:
|
||||
wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_MAXCNTMASK);
|
||||
DELAY(1);
|
||||
config &= ~IBS_OP_CTL_ENABLE;
|
||||
wrmsr(IBS_OP_CTL, config);
|
||||
if (use_alt_disable) {
|
||||
wrmsr(IBS_OP_CTL2,
|
||||
config2 | IBS_OP_CTL2_DISABLE);
|
||||
wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_ENABLE);
|
||||
} else {
|
||||
wrmsr(IBS_OP_CTL,
|
||||
config & ~IBS_OP_CTL_MAXCNTMASK);
|
||||
DELAY(1);
|
||||
config &= ~IBS_OP_CTL_ENABLE;
|
||||
wrmsr(IBS_OP_CTL, config);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -420,9 +578,13 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
|
||||
switch (ri) {
|
||||
case IBS_PMC_FETCH:
|
||||
wrmsr(IBS_FETCH_CTL, 0);
|
||||
if (ibs_fetch_ctl2_supported)
|
||||
wrmsr(IBS_FETCH_CTL2, 0);
|
||||
break;
|
||||
case IBS_PMC_OP:
|
||||
wrmsr(IBS_OP_CTL, 0);
|
||||
if (ibs_op_ctl2_supported)
|
||||
wrmsr(IBS_OP_CTL2, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -456,6 +618,9 @@ pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config)
|
||||
mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] =
|
||||
rdmsr(IBS_FETCH_PHYSADDR);
|
||||
}
|
||||
if (ibs_fetch_ctl2_supported) {
|
||||
mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL2] = rdmsr(IBS_FETCH_CTL2);
|
||||
}
|
||||
|
||||
pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
|
||||
|
||||
@@ -490,6 +655,9 @@ pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config)
|
||||
if ((ibs_features & CPUID_IBSID_IBSOPDATA4) != 0) {
|
||||
mpd.pl_mpdata[PMC_MPIDX_OP_DATA4] = rdmsr(IBS_OP_DATA4);
|
||||
}
|
||||
if (ibs_op_ctl2_supported) {
|
||||
mpd.pl_mpdata[PMC_MPIDX_OP_CTL2] = rdmsr(IBS_OP_CTL2);
|
||||
}
|
||||
|
||||
pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
|
||||
|
||||
@@ -647,6 +815,10 @@ ibs_pcpu_fini(struct pmc_mdep *md, int cpu)
|
||||
*/
|
||||
wrmsr(IBS_FETCH_CTL, 0);
|
||||
wrmsr(IBS_OP_CTL, 0);
|
||||
if (ibs_fetch_ctl2_supported)
|
||||
wrmsr(IBS_FETCH_CTL2, 0);
|
||||
if (ibs_op_ctl2_supported)
|
||||
wrmsr(IBS_OP_CTL2, 0);
|
||||
|
||||
/*
|
||||
* Free up allocated space.
|
||||
|
||||
@@ -35,6 +35,9 @@
|
||||
/*
|
||||
* All of the CPUID definitions come from AMD PPR Vol 1 for AMD Family 1Ah
|
||||
* Model 02h C1 (57238) 2024-09-29 Revision 0.24.
|
||||
* Zen 6 CPUID bits (IBSDIS, FETCHLATFILTERING, ADDRBIT63FILTERING) come from
|
||||
* AMD64 Architecture Programmer's Manual Volume 2: System Programming (24593)
|
||||
* 2025-07-02 Version 3.43.
|
||||
*/
|
||||
#define CPUID_IBSID 0x8000001B
|
||||
#define CPUID_IBSID_IBSFFV 0x00000001 /* IBS Feature Flags Valid */
|
||||
@@ -50,6 +53,12 @@
|
||||
#define CPUID_IBSID_IBSOPDATA4 0x00000400 /* IBS OP DATA4 */
|
||||
#define CPUID_IBSID_ZEN4IBSEXTENSIONS 0x00000800 /* IBS Zen 4 Extensions */
|
||||
#define CPUID_IBSID_IBSLOADLATENCYFILT 0x00001000 /* Load Latency Filtering */
|
||||
#define CPUID_IBSID_IBSDIS 0x00002000 /* Alternate IBS Disable */
|
||||
#define CPUID_IBSID_FETCHLATFILTERING 0x00004000 /* Fetch Latency Filter */
|
||||
#define CPUID_IBSID_ADDRBIT63FILTERING 0x00008000 /* Addr Bit 63 Filter */
|
||||
#define CPUID_IBSID_STRMSTANDRMTSOCKET 0x00010000 /* StrmSt + RmtSocket */
|
||||
#define CPUID_IBSID_BUFFERV1 0x00020000 /* IBS Buffering V1 */
|
||||
#define CPUID_IBSID_MEMPROFILERV1 0x00040000 /* IBS Memory Profiler V1 */
|
||||
#define CPUID_IBSID_IBSUPDTDDTLBSTATS 0x00080000 /* Simplified DTLB Stats */
|
||||
|
||||
/*
|
||||
@@ -107,11 +116,27 @@
|
||||
#define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */
|
||||
#define IBS_FETCH_EXTCTL 0xC001103C /* Fetch Control Extended */
|
||||
|
||||
/* IBS Fetch Control 2 (Zen 6) */
|
||||
#define IBS_FETCH_CTL2 0xC001103F /* IBS Fetch Control 2 */
|
||||
#define IBS_FETCH_CTL2_DISABLE (1ULL << 0) /* IBS Fetch Disable */
|
||||
#define IBS_FETCH_CTL2_LATFILTERMASK (0xFULL << 1) /* Fetch Latency Filter */
|
||||
#define IBS_FETCH_CTL2_EXCLADDR63EQ1 (1ULL << 5) /* Exclude addr bit63=1 */
|
||||
#define IBS_FETCH_CTL2_EXCLADDR63EQ0 (1ULL << 6) /* Exclude addr bit63=0 */
|
||||
#define IBS_FETCH_CTL2_ADDR63MASK (IBS_FETCH_CTL2_EXCLADDR63EQ0 | \
|
||||
IBS_FETCH_CTL2_EXCLADDR63EQ1)
|
||||
|
||||
#define IBS_FETCH_CTL2_LAT_MIN 128
|
||||
#define IBS_FETCH_CTL2_LAT_MAX 1920
|
||||
#define IBS_FETCH_CTL2_LAT_STEP 128
|
||||
#define IBS_FETCH_CTL2_LAT_TO_CTL(_l) ((((_l) >> 7) & 0xFULL) << 1)
|
||||
#define IBS_FETCH_CTL2_CTL_TO_LAT(_c) ((((_c) >> 1) & 0xFULL) << 7)
|
||||
|
||||
#define PMC_MPIDX_FETCH_CTL 0
|
||||
#define PMC_MPIDX_FETCH_EXTCTL 1
|
||||
#define PMC_MPIDX_FETCH_LINADDR 2
|
||||
#define PMC_MPIDX_FETCH_PHYSADDR 3
|
||||
#define PMC_MPIDX_FETCH_MAX (PMC_MPIDX_FETCH_PHYSADDR + 1)
|
||||
#define PMC_MPIDX_FETCH_CTL2 4
|
||||
#define PMC_MPIDX_FETCH_MAX (PMC_MPIDX_FETCH_CTL2 + 1)
|
||||
|
||||
/* IBS Execution Control */
|
||||
#define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */
|
||||
@@ -148,6 +173,8 @@
|
||||
#define IBS_OP_DATA_RETURN (1ULL << 34) /* Return */
|
||||
|
||||
#define IBS_OP_DATA2 0xC0011036 /* IBS Op Data 2 */
|
||||
#define IBS_OP_DATA2_RMTSOCKET (1ULL << 9) /* Remote Socket */
|
||||
#define IBS_OP_DATA2_STRMST (1ULL << 8) /* Streaming Store */
|
||||
#define IBS_OP_DATA3 0xC0011037 /* IBS Op Data 3 */
|
||||
#define IBS_OP_DATA3_DCPHYADDRVALID (1ULL << 18) /* DC Physical Address */
|
||||
#define IBS_OP_DATA3_DCLINADDRVALID (1ULL << 17) /* DC Linear Address */
|
||||
@@ -169,6 +196,15 @@
|
||||
#define IBS_OP_DATA4 0xC001103D /* IBS Op Data 4 */
|
||||
#define IBS_OP_DATA4_LDRESYNC (1ULL << 0) /* Load Resync */
|
||||
|
||||
/* IBS Execution Control 2 (Zen 6) */
|
||||
#define IBS_OP_CTL2 0xC001103E /* IBS Execution Control 2 */
|
||||
#define IBS_OP_CTL2_DISABLE (1ULL << 0) /* IBS Execution Disable */
|
||||
#define IBS_OP_CTL2_EXCLRIP63EQ0 (1ULL << 1) /* Exclude RIP bit63=0 */
|
||||
#define IBS_OP_CTL2_EXCLRIP63EQ1 (1ULL << 2) /* Exclude RIP bit63=1 */
|
||||
#define IBS_OP_CTL2_STRMSTFILTER (1ULL << 3) /* Streaming Store Filter */
|
||||
#define IBS_OP_CTL2_RIP63MASK (IBS_OP_CTL2_EXCLRIP63EQ0 | \
|
||||
IBS_OP_CTL2_EXCLRIP63EQ1)
|
||||
|
||||
#define PMC_MPIDX_OP_CTL 0
|
||||
#define PMC_MPIDX_OP_RIP 1
|
||||
#define PMC_MPIDX_OP_DATA 2
|
||||
@@ -178,7 +214,8 @@
|
||||
#define PMC_MPIDX_OP_DC_PHYSADDR 6
|
||||
#define PMC_MPIDX_OP_TGT_RIP 7
|
||||
#define PMC_MPIDX_OP_DATA4 8
|
||||
#define PMC_MPIDX_OP_MAX (PMC_MPIDX_OP_DATA4 + 1)
|
||||
#define PMC_MPIDX_OP_CTL2 9
|
||||
#define PMC_MPIDX_OP_MAX (PMC_MPIDX_OP_CTL2 + 1)
|
||||
|
||||
/*
|
||||
* IBS data is encoded as using the multipart flag in the existing callchain
|
||||
@@ -204,8 +241,8 @@ struct pmc_md_ibs_pmc {
|
||||
uint64_t ibs_ctl2;
|
||||
};
|
||||
|
||||
#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | \
|
||||
PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
|
||||
#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | \
|
||||
PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
|
||||
|
||||
int pmc_ibs_initialize(struct pmc_mdep *md, int ncpu);
|
||||
void pmc_ibs_finalize(struct pmc_mdep *md);
|
||||
|
||||
@@ -371,10 +371,10 @@ pmcstat_pmcindex_to_pmcr(int pmcin)
|
||||
|
||||
#if defined(__amd64__) || defined(__i386__)
|
||||
static void
|
||||
pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
|
||||
pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset, int len64)
|
||||
{
|
||||
uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
|
||||
uint64_t ctl;
|
||||
uint64_t ctl, ctl2;
|
||||
|
||||
ctl = ibsbuf[PMC_MPIDX_FETCH_CTL];
|
||||
PMCSTAT_PRINT_ENTRY("ibs-fetch", "%s%s%s%s",
|
||||
@@ -390,15 +390,28 @@ pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
|
||||
PMCSTAT_PRINT_ENTRY("IBS", "Physical Address %" PRIx64,
|
||||
ibsbuf[PMC_MPIDX_FETCH_PHYSADDR]);
|
||||
}
|
||||
if (len64 > PMC_MPIDX_FETCH_CTL2) {
|
||||
ctl2 = ibsbuf[PMC_MPIDX_FETCH_CTL2];
|
||||
if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ1) != 0)
|
||||
PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=0");
|
||||
if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ0) != 0)
|
||||
PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=1");
|
||||
if ((ctl2 & IBS_FETCH_CTL2_LATFILTERMASK) != 0) {
|
||||
PMCSTAT_PRINT_ENTRY("ibs-fetch",
|
||||
"fetchlat>=%" PRIu64,
|
||||
(uint64_t)IBS_FETCH_CTL2_CTL_TO_LAT(ctl2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
|
||||
pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset, int len64)
|
||||
{
|
||||
uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
|
||||
uint64_t data, data3;
|
||||
uint64_t data, data2, data3, ctl2;
|
||||
|
||||
data = ibsbuf[PMC_MPIDX_OP_DATA];
|
||||
data2 = ibsbuf[PMC_MPIDX_OP_DATA2];
|
||||
data3 = ibsbuf[PMC_MPIDX_OP_DATA3];
|
||||
|
||||
if ((data & IBS_OP_DATA_RIPINVALID) == 0) {
|
||||
@@ -416,6 +429,11 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
|
||||
(data3 & IBS_OP_DATA3_LOCKEDOP) ? "lock " : "",
|
||||
(data3 & IBS_OP_DATA3_DCL1TLBMISS) ? "l1tlbmiss " : "",
|
||||
(data3 & IBS_OP_DATA3_DCMISS) ? "dcmiss " : "");
|
||||
if ((data2 & (IBS_OP_DATA2_STRMST | IBS_OP_DATA2_RMTSOCKET)) != 0) {
|
||||
PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s",
|
||||
(data2 & IBS_OP_DATA2_STRMST) ? "streamstore " : "",
|
||||
(data2 & IBS_OP_DATA2_RMTSOCKET) ? "remotesocket" : "");
|
||||
}
|
||||
PMCSTAT_PRINT_ENTRY("ibs-op", "Latency %" PRIu64,
|
||||
IBS_OP_DATA3_TO_DCLAT(data3));
|
||||
if ((data3 & IBS_OP_DATA3_DCLINADDRVALID) != 0) {
|
||||
@@ -426,6 +444,15 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
|
||||
PMCSTAT_PRINT_ENTRY("ibs-op", "Physical Address %" PRIx64,
|
||||
ibsbuf[PMC_MPIDX_OP_DC_PHYSADDR]);
|
||||
}
|
||||
if (len64 > PMC_MPIDX_OP_CTL2) {
|
||||
ctl2 = ibsbuf[PMC_MPIDX_OP_CTL2];
|
||||
if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ1) != 0)
|
||||
PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=0");
|
||||
if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ0) != 0)
|
||||
PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=1");
|
||||
if ((ctl2 & IBS_OP_CTL2_STRMSTFILTER) != 0)
|
||||
PMCSTAT_PRINT_ENTRY("ibs-op", "streamstore");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -446,9 +473,11 @@ pmcstat_print_multipart(struct pmclog_ev_callchain *cc)
|
||||
return (offset);
|
||||
#if defined(__amd64__) || defined(__i386__)
|
||||
} else if (type == PMC_CC_MULTIPART_IBS_FETCH) {
|
||||
pmcstat_print_ibs_fetch(cc, offset);
|
||||
pmcstat_print_ibs_fetch(cc, offset,
|
||||
len / (sizeof(uint64_t) / sizeof(uintptr_t)));
|
||||
} else if (type == PMC_CC_MULTIPART_IBS_OP) {
|
||||
pmcstat_print_ibs_op(cc, offset);
|
||||
pmcstat_print_ibs_op(cc, offset,
|
||||
len / (sizeof(uint64_t) / sizeof(uintptr_t)));
|
||||
#endif
|
||||
} else {
|
||||
PMCSTAT_PRINT_ENTRY("unsupported multipart type!");
|
||||
|
||||
Reference in New Issue
Block a user