hwpmc_ibs: Add Zen6 IBS ctl2 filters and alternate disable

Add kernel and userland support for Zen6 IBS extensions per AMD pub
69205 (rev 1.00, March 2026): alternate fetch/op disable via ctl2[0],
fetch latency filtering, virtual address bit 63 filtering, and
streaming-store filtering.  Decode the new IbsOpData2 StrmSt and
RmtSocket bits. Update libpmc, pmcstat and manpage.

Pre-Zen6 systems work unchanged with ibs_ctl2 == 0.

Signed-off-by:	Andre Silva <andasilv@amd.com>
Reviewed by:	Ali Mashtizadeh <ali@mashtizadeh.com>, mhorne
Sponsored by:	AMD
Differential Revision:	https://reviews.freebsd.org/D56914
This commit is contained in:
Andre Silva
2026-06-11 11:15:35 -03:00
committed by Mitchell Horne
parent ff46acfd52
commit 0aa4c25f3e
5 changed files with 395 additions and 37 deletions
+53 -2
View File
@@ -700,13 +700,14 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
struct pmc_op_pmcallocate *pmc_config)
{
char *e, *p, *q;
uint64_t ctl, ldlat;
uint64_t ctl, ctl2, ldlat, fetchlat;
u_int ibs_features;
u_int regs[4];
pmc_config->pm_caps |=
(PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);
pmc_config->pm_md.pm_ibs.ibs_ctl = 0;
pmc_config->pm_md.pm_ibs.ibs_ctl2 = 0;
/* setup parsing tables */
switch (pe) {
@@ -735,6 +736,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
/* parse parameters */
ctl = 0;
ctl2 = 0;
if (pe == PMC_EV_IBS_FETCH) {
while ((p = strsep(&ctrspec, ",")) != NULL) {
if (KWMATCH(p, "l3miss")) {
@@ -744,6 +746,37 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
ctl |= IBS_FETCH_CTL_L3MISSONLY;
} else if (KWMATCH(p, "randomize")) {
ctl |= IBS_FETCH_CTL_RANDOMIZE;
} else if (KWPREFIXMATCH(p, "fetchlat=")) {
if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) == 0)
return (-1);
q = strchr(p, '=');
if (*++q == '\0')
return (-1);
fetchlat = strtoull(q, &e, 0);
if (e == q || *e != '\0')
return (-1);
if (fetchlat < IBS_FETCH_CTL2_LAT_MIN ||
fetchlat > IBS_FETCH_CTL2_LAT_MAX)
return (-1);
if ((fetchlat % IBS_FETCH_CTL2_LAT_STEP) != 0)
return (-1);
/* clear prior threshold */
ctl2 &= ~IBS_FETCH_CTL2_LATFILTERMASK;
ctl2 |= IBS_FETCH_CTL2_LAT_TO_CTL(fetchlat);
} else if (KWMATCH(p, "usr")) {
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
return (-1);
pmc_config->pm_caps |= PMC_CAP_USER;
} else if (KWMATCH(p, "os")) {
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
return (-1);
pmc_config->pm_caps |= PMC_CAP_SYSTEM;
} else {
return (-1);
}
@@ -783,6 +816,9 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
*/
if (ldlat < 128 || ldlat > 2048)
return (-1);
/* clear prior ldlat threshold */
ctl &= ~IBS_OP_CTL_LDLATTRSHMASK;
ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat);
ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN;
} else if (KWMATCH(p, "opcount")) {
@@ -790,6 +826,21 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
return (-1);
ctl |= IBS_OP_CTL_COUNTERCONTROL;
} else if (KWMATCH(p, "usr")) {
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
return (-1);
pmc_config->pm_caps |= PMC_CAP_USER;
} else if (KWMATCH(p, "os")) {
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
return (-1);
pmc_config->pm_caps |= PMC_CAP_SYSTEM;
} else if (KWMATCH(p, "streamstore")) {
if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) == 0)
return (-1);
ctl2 |= IBS_OP_CTL2_STRMSTFILTER;
} else {
return (-1);
}
@@ -806,8 +857,8 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count);
}
pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
pmc_config->pm_md.pm_ibs.ibs_ctl2 |= ctl2;
return (0);
}
+73 -4
View File
@@ -89,7 +89,7 @@ AMD IBS supports the following capabilities.
.It PMC_CAP_SYSTEM Ta Yes
.It PMC_CAP_TAGGING Ta \&No
.It PMC_CAP_THRESHOLD Ta \&No
.It PMC_CAP_USER Ta \&No
.It PMC_CAP_USER Ta Yes (Zen 6)
.It PMC_CAP_WRITE Ta \&No
.El
.Pp
@@ -97,25 +97,91 @@ By default AMD IBS enables the edge, interrupt, system and precise flags.
.Ss Event Qualifiers
Event specifiers for AMD IBS can have the following optional
qualifiers:
.Bl -tag -width "ldlat=value"
.Bl -tag -width "fetchlat=value"
.It Li usr
Valid for both
.Ar ibs-fetch
and
.Ar ibs-op
events.
Configure the counter to only sample user-mode events.
Requires Zen 6 IBS extensions
.Pq CPUID Fn Fn8000_0001B
.Va EAX[IbsAddrBit63Filtering] ,
and is rejected when the CPU does not advertise support.
.It Li os
Valid for both
.Ar ibs-fetch
and
.Ar ibs-op
events.
Configure the counter to only sample kernel-mode events.
Requires Zen 6 IBS extensions
.Pq CPUID Fn Fn8000_0001B
.Va EAX[IbsAddrBit63Filtering] ,
and is rejected when the CPU does not advertise support.
.It Li fetchlat= Ns Ar value
Valid only for
.Ar ibs-fetch
events.
Configure the counter to only sample fetches whose latency is greater than or
equal to
.Ar value
core clock cycles.
The valid range is 128 to 1920 in steps of 128.
Requires Zen 6 IBS extensions
.Pq CPUID Fn Fn8000_0001B
.Va EAX[IbsFetchLatencyFiltering] ,
and is rejected when the CPU does not advertise support.
.It Li l3miss
Valid for both
.Ar ibs-fetch
and
.Ar ibs-op
events.
Configure IBS to only sample if an l3miss occurred.
.It Li ldlat= Ns Ar value
Valid only for
.Ar ibs-op
events.
Configure the counter to only sample events with load latencies above
.Ar ldlat .
IBS only supports filtering latencies that are a multiple of 128 and between
128 and 2048.
Load latency filtering can only be used with ibs-op events and imply the
l3miss qualifier.
On pre-Zen 6 hardware this qualifier implies the
.Li l3miss
qualifier; on Zen 6 and later, latency-only filtering without
.Li l3miss
is permitted.
.It Li opcount
Valid only for
.Ar ibs-op
events.
Count ops rather than cycles.
.It Li randomize
Valid only for
.Ar ibs-fetch
events.
Randomize the sampling rate.
.It Li streamstore
Valid only for
.Ar ibs-op
events.
Configure the counter to only sample streaming
.Pq non-temporal
store operations.
Requires Zen 6 IBS extensions
.Pq CPUID Fn Fn8000_0001B
.Va EAX[IbsStrmStAndRmtSocket] ,
and is rejected when the CPU does not advertise support.
.El
.Ss AMD IBS Events Specifiers
The IBS event class provides only two event specifiers:
.Bl -tag -width indent
.It Li ibs-fetch Xo
.Op ,usr
.Op ,os
.Op ,fetchlat= Ns Ar value
.Op ,l3miss
.Op ,randomize
.Xc
@@ -124,9 +190,12 @@ The
.Ar randomize
qualifier randomly sets the bottom four bits of the sample rate.
.It Li ibs-op Xo
.Op ,usr
.Op ,os
.Op ,l3miss
.Op ,ldlat= Ns Ar ldlat
.Op ,opcount
.Op ,streamstore
.Xc
Collect performance samples during instruction execution.
The
+193 -21
View File
@@ -60,9 +60,15 @@ struct ibs_descr {
static uint64_t ibs_features;
static uint64_t ibs_fetch_allowed_mask;
static uint64_t ibs_op_allowed_mask;
static uint64_t ibs_fetch_ctl2_allowed_mask;
static uint64_t ibs_op_ctl2_allowed_mask;
static bool ibs_fetch_ctl2_supported;
static bool ibs_op_ctl2_supported;
static uint64_t ibs_fetch_extra_mask;
static uint64_t ibs_fetch_ctl2_extra_mask;
static uint64_t ibs_op_extra_mask;
static uint64_t ibs_op_ctl2_extra_mask;
SYSCTL_DECL(_kern_hwpmc);
@@ -70,10 +76,18 @@ SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_extra_mask, CTLFLAG_RDTUN,
&ibs_fetch_extra_mask, 0,
"Extra allowed bits in the IBS fetch control MSR (override; default 0)");
SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_ctl2_extra_mask, CTLFLAG_RDTUN,
&ibs_fetch_ctl2_extra_mask, 0,
"Extra allowed bits in the IBS fetch control 2 MSR (override; default 0)");
SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_extra_mask, CTLFLAG_RDTUN,
&ibs_op_extra_mask, 0,
"Extra allowed bits in the IBS op control MSR (override; default 0)");
SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_ctl2_extra_mask, CTLFLAG_RDTUN,
&ibs_op_ctl2_extra_mask, 0,
"Extra allowed bits in the IBS op control 2 MSR (override; default 0)");
/*
* Per-processor information
*/
@@ -92,8 +106,10 @@ ibs_init_policy(void)
{
ibs_fetch_allowed_mask = IBS_FETCH_ALLOWED_MASK_BASE;
ibs_fetch_ctl2_allowed_mask = 0;
ibs_op_allowed_mask = IBS_OP_CTL_MAXCNTBASEMASK;
ibs_op_ctl2_allowed_mask = 0;
if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0)
ibs_fetch_allowed_mask |= IBS_FETCH_CTL_L3MISSONLY;
@@ -106,6 +122,26 @@ ibs_init_policy(void)
if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0)
ibs_op_allowed_mask |= IBS_OP_CTL_L3MISSONLY;
if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) != 0)
ibs_fetch_ctl2_allowed_mask |= IBS_FETCH_CTL2_LATFILTERMASK;
if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) != 0)
ibs_op_ctl2_allowed_mask |= IBS_OP_CTL2_STRMSTFILTER;
if ((ibs_features & CPUID_IBSID_IBSDIS) != 0) {
ibs_fetch_ctl2_supported = true;
ibs_op_ctl2_supported = true;
}
/*
* ctl2 MSRs only exist on Zen 6; writing them on older silicon
* would #GP.
*/
if (!ibs_fetch_ctl2_supported)
ibs_fetch_ctl2_supported = (ibs_fetch_ctl2_allowed_mask != 0);
if (!ibs_op_ctl2_supported)
ibs_op_ctl2_supported = (ibs_op_ctl2_allowed_mask != 0);
}
static int
@@ -128,7 +164,12 @@ ibs_validate_op_config(uint64_t config)
if ((config & IBS_OP_CTL_LATFLTEN) != 0) {
if ((ibs_features & CPUID_IBSID_IBSLOADLATENCYFILT) == 0)
return (EINVAL);
if ((config & IBS_OP_CTL_L3MISSONLY) == 0)
/*
* Zen 6 decouples L3MISSONLY from load-latency filtering
* (AMD pub 69205); enforce the pairing only on older parts.
*/
if ((ibs_features & CPUID_IBSID_IBSDIS) == 0 &&
(config & IBS_OP_CTL_L3MISSONLY) == 0)
return (EINVAL);
allowed_mask |= IBS_OP_CTL_LDLATMASK | IBS_OP_CTL_L3MISSONLY;
@@ -143,16 +184,67 @@ ibs_validate_op_config(uint64_t config)
}
static int
ibs_validate_pmc_config(int ri, uint64_t config)
ibs_validate_fetch_ctl2_config(uint64_t config)
{
uint64_t allowed_mask;
if (config == 0)
return (0);
if (!ibs_fetch_ctl2_supported)
return (EXTERROR(EINVAL,
"IBS fetch ctl2 features are not supported on this CPU"));
allowed_mask = ibs_fetch_ctl2_allowed_mask | ibs_fetch_ctl2_extra_mask;
if ((config & ~allowed_mask) != 0)
return (EXTERROR(EINVAL,
"IBS fetch ctl2 config 0x%jx has bits outside allowed"
" mask 0x%jx", (uint64_t)config, (uint64_t)allowed_mask));
return (0);
}
static int
ibs_validate_op_ctl2_config(uint64_t config)
{
uint64_t allowed_mask;
if (config == 0)
return (0);
if (!ibs_op_ctl2_supported)
return (EXTERROR(EINVAL,
"IBS op ctl2 features are not supported on this CPU"));
allowed_mask = ibs_op_ctl2_allowed_mask | ibs_op_ctl2_extra_mask;
if ((config & ~allowed_mask) != 0)
return (EXTERROR(EINVAL,
"IBS op ctl2 config 0x%jx has bits outside allowed mask"
" 0x%jx", (uint64_t)config, (uint64_t)allowed_mask));
return (0);
}
static int
ibs_validate_pmc_config(int ri, uint64_t config, uint64_t config2)
{
int error;
switch (ri) {
case IBS_PMC_FETCH:
return (ibs_validate_fetch_config(config));
error = ibs_validate_fetch_config(config);
if (error != 0)
return (error);
return (ibs_validate_fetch_ctl2_config(config2));
case IBS_PMC_OP:
return (ibs_validate_op_config(config));
error = ibs_validate_op_config(config);
if (error != 0)
return (error);
return (ibs_validate_op_ctl2_config(config2));
default:
return (EINVAL);
return (EXTERROR(EINVAL, "invalid IBS PMC index %d", ri));
}
}
@@ -266,7 +358,7 @@ static int
ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
const struct pmc_op_pmcallocate *a)
{
uint64_t caps, config;
uint64_t caps, config, config2;
int error;
KASSERT(ri >= 0 && ri < IBS_NPMCS,
@@ -284,20 +376,53 @@ ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps);
if ((caps & PMC_CAP_SYSTEM) == 0)
return (EXTERROR(EINVAL, "IBS requires SYSTEM capability"));
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) {
if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
return (EXTERROR(EINVAL,
"IBS requires at least USER or SYSTEM capability"));
} else {
if ((caps & PMC_CAP_SYSTEM) == 0)
return (EXTERROR(EINVAL,
"IBS requires SYSTEM capability"));
if ((caps & PMC_CAP_USER) != 0)
return (EXTERROR(EINVAL,
"IBS USER filtering requires Zen 6 addr63 support"));
}
if (!PMC_IS_SAMPLING_MODE(a->pm_mode))
return (EINVAL);
config = a->pm_md.pm_ibs.ibs_ctl;
error = ibs_validate_pmc_config(ri, config);
config2 = a->pm_md.pm_ibs.ibs_ctl2;
error = ibs_validate_pmc_config(ri, config, config2);
if (error != 0)
return (error);
pm->pm_md.pm_ibs.ibs_ctl = config;
pm->pm_md.pm_ibs.ibs_ctl2 = config2;
PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%jx", ri,
config);
if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) {
if ((caps & PMC_CAP_USER) != 0 &&
(caps & PMC_CAP_SYSTEM) == 0) {
if (ri == IBS_PMC_FETCH)
pm->pm_md.pm_ibs.ibs_ctl2 |=
IBS_FETCH_CTL2_EXCLADDR63EQ1;
else
pm->pm_md.pm_ibs.ibs_ctl2 |=
IBS_OP_CTL2_EXCLRIP63EQ1;
} else if ((caps & PMC_CAP_SYSTEM) != 0 &&
(caps & PMC_CAP_USER) == 0) {
if (ri == IBS_PMC_FETCH)
pm->pm_md.pm_ibs.ibs_ctl2 |=
IBS_FETCH_CTL2_EXCLADDR63EQ0;
else
pm->pm_md.pm_ibs.ibs_ctl2 |=
IBS_OP_CTL2_EXCLRIP63EQ0;
}
}
PMCDBG3(MDP, ALL, 2,
"ibs-allocate ri=%d -> config=0x%jx config2=0x%jx", ri,
config, config2);
return (0);
}
@@ -349,16 +474,24 @@ ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm)
/*
* Turn on the ENABLE bit. Zeroing out the control register eliminates
* stale valid bits from spurious NMIs and it resets the counter.
* stale valid bits from spurious NMIs and it resets the counter. This
* is safe here because the counter is not yet enabled; the NMI re-arm
* path must not do the same (Family 10h erratum #420).
*/
switch (ri) {
case IBS_PMC_FETCH:
wrmsr(IBS_FETCH_CTL, 0);
if (ibs_fetch_ctl2_supported)
wrmsr(IBS_FETCH_CTL2,
pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_FETCH_CTL2_DISABLE);
config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE;
wrmsr(IBS_FETCH_CTL, config);
break;
case IBS_PMC_OP:
wrmsr(IBS_OP_CTL, 0);
if (ibs_op_ctl2_supported)
wrmsr(IBS_OP_CTL2,
pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_OP_CTL2_DISABLE);
config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE;
wrmsr(IBS_OP_CTL, config);
break;
@@ -374,7 +507,8 @@ static int
ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
{
int i;
uint64_t config;
uint64_t config, config2;
bool use_alt_disable;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
@@ -394,23 +528,47 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
* are stopping and discard spurious NMIs. We then retry clearing the
* control register for 50us. This gives us enough time and ensures
* that the valid bit is not accidently stuck after a spurious NMI.
*
* On Zen 6 with the alternate disable bit (CPUID IbsDis), assert the
* ctl2 DISABLE bit first. This avoids an RMW hazard in ctl1 that the
* processor may update concurrently while sampling.
*/
config = pm->pm_md.pm_ibs.ibs_ctl;
config2 = pm->pm_md.pm_ibs.ibs_ctl2;
use_alt_disable = (ibs_features & CPUID_IBSID_IBSDIS) != 0;
atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING);
/*
* On Zen 6, ctl2 DISABLE is the authoritative stop switch; skip
* the legacy ctl1 RMW and clear it directly
*/
switch (ri) {
case IBS_PMC_FETCH:
wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK);
DELAY(1);
config &= ~IBS_FETCH_CTL_ENABLE;
wrmsr(IBS_FETCH_CTL, config);
if (use_alt_disable) {
wrmsr(IBS_FETCH_CTL2,
config2 | IBS_FETCH_CTL2_DISABLE);
wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_ENABLE);
} else {
wrmsr(IBS_FETCH_CTL,
config & ~IBS_FETCH_CTL_MAXCNTMASK);
DELAY(1);
config &= ~IBS_FETCH_CTL_ENABLE;
wrmsr(IBS_FETCH_CTL, config);
}
break;
case IBS_PMC_OP:
wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_MAXCNTMASK);
DELAY(1);
config &= ~IBS_OP_CTL_ENABLE;
wrmsr(IBS_OP_CTL, config);
if (use_alt_disable) {
wrmsr(IBS_OP_CTL2,
config2 | IBS_OP_CTL2_DISABLE);
wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_ENABLE);
} else {
wrmsr(IBS_OP_CTL,
config & ~IBS_OP_CTL_MAXCNTMASK);
DELAY(1);
config &= ~IBS_OP_CTL_ENABLE;
wrmsr(IBS_OP_CTL, config);
}
break;
}
@@ -420,9 +578,13 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
switch (ri) {
case IBS_PMC_FETCH:
wrmsr(IBS_FETCH_CTL, 0);
if (ibs_fetch_ctl2_supported)
wrmsr(IBS_FETCH_CTL2, 0);
break;
case IBS_PMC_OP:
wrmsr(IBS_OP_CTL, 0);
if (ibs_op_ctl2_supported)
wrmsr(IBS_OP_CTL2, 0);
break;
}
}
@@ -456,6 +618,9 @@ pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config)
mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] =
rdmsr(IBS_FETCH_PHYSADDR);
}
if (ibs_fetch_ctl2_supported) {
mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL2] = rdmsr(IBS_FETCH_CTL2);
}
pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
@@ -490,6 +655,9 @@ pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config)
if ((ibs_features & CPUID_IBSID_IBSOPDATA4) != 0) {
mpd.pl_mpdata[PMC_MPIDX_OP_DATA4] = rdmsr(IBS_OP_DATA4);
}
if (ibs_op_ctl2_supported) {
mpd.pl_mpdata[PMC_MPIDX_OP_CTL2] = rdmsr(IBS_OP_CTL2);
}
pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
@@ -647,6 +815,10 @@ ibs_pcpu_fini(struct pmc_mdep *md, int cpu)
*/
wrmsr(IBS_FETCH_CTL, 0);
wrmsr(IBS_OP_CTL, 0);
if (ibs_fetch_ctl2_supported)
wrmsr(IBS_FETCH_CTL2, 0);
if (ibs_op_ctl2_supported)
wrmsr(IBS_OP_CTL2, 0);
/*
* Free up allocated space.
+41 -4
View File
@@ -35,6 +35,9 @@
/*
* All of the CPUID definitions come from AMD PPR Vol 1 for AMD Family 1Ah
* Model 02h C1 (57238) 2024-09-29 Revision 0.24.
* Zen 6 CPUID bits (IBSDIS, FETCHLATFILTERING, ADDRBIT63FILTERING) come from
* AMD64 Architecture Programmer's Manual Volume 2: System Programming (24593)
* 2025-07-02 Version 3.43.
*/
#define CPUID_IBSID 0x8000001B
#define CPUID_IBSID_IBSFFV 0x00000001 /* IBS Feature Flags Valid */
@@ -50,6 +53,12 @@
#define CPUID_IBSID_IBSOPDATA4 0x00000400 /* IBS OP DATA4 */
#define CPUID_IBSID_ZEN4IBSEXTENSIONS 0x00000800 /* IBS Zen 4 Extensions */
#define CPUID_IBSID_IBSLOADLATENCYFILT 0x00001000 /* Load Latency Filtering */
#define CPUID_IBSID_IBSDIS 0x00002000 /* Alternate IBS Disable */
#define CPUID_IBSID_FETCHLATFILTERING 0x00004000 /* Fetch Latency Filter */
#define CPUID_IBSID_ADDRBIT63FILTERING 0x00008000 /* Addr Bit 63 Filter */
#define CPUID_IBSID_STRMSTANDRMTSOCKET 0x00010000 /* StrmSt + RmtSocket */
#define CPUID_IBSID_BUFFERV1 0x00020000 /* IBS Buffering V1 */
#define CPUID_IBSID_MEMPROFILERV1 0x00040000 /* IBS Memory Profiler V1 */
#define CPUID_IBSID_IBSUPDTDDTLBSTATS 0x00080000 /* Simplified DTLB Stats */
/*
@@ -107,11 +116,27 @@
#define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */
#define IBS_FETCH_EXTCTL 0xC001103C /* Fetch Control Extended */
/* IBS Fetch Control 2 (Zen 6) */
#define IBS_FETCH_CTL2 0xC001103F /* IBS Fetch Control 2 */
#define IBS_FETCH_CTL2_DISABLE (1ULL << 0) /* IBS Fetch Disable */
#define IBS_FETCH_CTL2_LATFILTERMASK (0xFULL << 1) /* Fetch Latency Filter */
#define IBS_FETCH_CTL2_EXCLADDR63EQ1 (1ULL << 5) /* Exclude addr bit63=1 */
#define IBS_FETCH_CTL2_EXCLADDR63EQ0 (1ULL << 6) /* Exclude addr bit63=0 */
#define IBS_FETCH_CTL2_ADDR63MASK (IBS_FETCH_CTL2_EXCLADDR63EQ0 | \
IBS_FETCH_CTL2_EXCLADDR63EQ1)
#define IBS_FETCH_CTL2_LAT_MIN 128
#define IBS_FETCH_CTL2_LAT_MAX 1920
#define IBS_FETCH_CTL2_LAT_STEP 128
#define IBS_FETCH_CTL2_LAT_TO_CTL(_l) ((((_l) >> 7) & 0xFULL) << 1)
#define IBS_FETCH_CTL2_CTL_TO_LAT(_c) ((((_c) >> 1) & 0xFULL) << 7)
#define PMC_MPIDX_FETCH_CTL 0
#define PMC_MPIDX_FETCH_EXTCTL 1
#define PMC_MPIDX_FETCH_LINADDR 2
#define PMC_MPIDX_FETCH_PHYSADDR 3
#define PMC_MPIDX_FETCH_MAX (PMC_MPIDX_FETCH_PHYSADDR + 1)
#define PMC_MPIDX_FETCH_CTL2 4
#define PMC_MPIDX_FETCH_MAX (PMC_MPIDX_FETCH_CTL2 + 1)
/* IBS Execution Control */
#define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */
@@ -148,6 +173,8 @@
#define IBS_OP_DATA_RETURN (1ULL << 34) /* Return */
#define IBS_OP_DATA2 0xC0011036 /* IBS Op Data 2 */
#define IBS_OP_DATA2_RMTSOCKET (1ULL << 9) /* Remote Socket */
#define IBS_OP_DATA2_STRMST (1ULL << 8) /* Streaming Store */
#define IBS_OP_DATA3 0xC0011037 /* IBS Op Data 3 */
#define IBS_OP_DATA3_DCPHYADDRVALID (1ULL << 18) /* DC Physical Address */
#define IBS_OP_DATA3_DCLINADDRVALID (1ULL << 17) /* DC Linear Address */
@@ -169,6 +196,15 @@
#define IBS_OP_DATA4 0xC001103D /* IBS Op Data 4 */
#define IBS_OP_DATA4_LDRESYNC (1ULL << 0) /* Load Resync */
/* IBS Execution Control 2 (Zen 6) */
#define IBS_OP_CTL2 0xC001103E /* IBS Execution Control 2 */
#define IBS_OP_CTL2_DISABLE (1ULL << 0) /* IBS Execution Disable */
#define IBS_OP_CTL2_EXCLRIP63EQ0 (1ULL << 1) /* Exclude RIP bit63=0 */
#define IBS_OP_CTL2_EXCLRIP63EQ1 (1ULL << 2) /* Exclude RIP bit63=1 */
#define IBS_OP_CTL2_STRMSTFILTER (1ULL << 3) /* Streaming Store Filter */
#define IBS_OP_CTL2_RIP63MASK (IBS_OP_CTL2_EXCLRIP63EQ0 | \
IBS_OP_CTL2_EXCLRIP63EQ1)
#define PMC_MPIDX_OP_CTL 0
#define PMC_MPIDX_OP_RIP 1
#define PMC_MPIDX_OP_DATA 2
@@ -178,7 +214,8 @@
#define PMC_MPIDX_OP_DC_PHYSADDR 6
#define PMC_MPIDX_OP_TGT_RIP 7
#define PMC_MPIDX_OP_DATA4 8
#define PMC_MPIDX_OP_MAX (PMC_MPIDX_OP_DATA4 + 1)
#define PMC_MPIDX_OP_CTL2 9
#define PMC_MPIDX_OP_MAX (PMC_MPIDX_OP_CTL2 + 1)
/*
* IBS data is encoded as using the multipart flag in the existing callchain
@@ -204,8 +241,8 @@ struct pmc_md_ibs_pmc {
uint64_t ibs_ctl2;
};
#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | \
PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | \
PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
int pmc_ibs_initialize(struct pmc_mdep *md, int ncpu);
void pmc_ibs_finalize(struct pmc_mdep *md);
+35 -6
View File
@@ -371,10 +371,10 @@ pmcstat_pmcindex_to_pmcr(int pmcin)
#if defined(__amd64__) || defined(__i386__)
static void
pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset, int len64)
{
uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
uint64_t ctl;
uint64_t ctl, ctl2;
ctl = ibsbuf[PMC_MPIDX_FETCH_CTL];
PMCSTAT_PRINT_ENTRY("ibs-fetch", "%s%s%s%s",
@@ -390,15 +390,28 @@ pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
PMCSTAT_PRINT_ENTRY("IBS", "Physical Address %" PRIx64,
ibsbuf[PMC_MPIDX_FETCH_PHYSADDR]);
}
if (len64 > PMC_MPIDX_FETCH_CTL2) {
ctl2 = ibsbuf[PMC_MPIDX_FETCH_CTL2];
if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ1) != 0)
PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=0");
if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ0) != 0)
PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=1");
if ((ctl2 & IBS_FETCH_CTL2_LATFILTERMASK) != 0) {
PMCSTAT_PRINT_ENTRY("ibs-fetch",
"fetchlat>=%" PRIu64,
(uint64_t)IBS_FETCH_CTL2_CTL_TO_LAT(ctl2));
}
}
}
static void
pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset, int len64)
{
uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
uint64_t data, data3;
uint64_t data, data2, data3, ctl2;
data = ibsbuf[PMC_MPIDX_OP_DATA];
data2 = ibsbuf[PMC_MPIDX_OP_DATA2];
data3 = ibsbuf[PMC_MPIDX_OP_DATA3];
if ((data & IBS_OP_DATA_RIPINVALID) == 0) {
@@ -416,6 +429,11 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
(data3 & IBS_OP_DATA3_LOCKEDOP) ? "lock " : "",
(data3 & IBS_OP_DATA3_DCL1TLBMISS) ? "l1tlbmiss " : "",
(data3 & IBS_OP_DATA3_DCMISS) ? "dcmiss " : "");
if ((data2 & (IBS_OP_DATA2_STRMST | IBS_OP_DATA2_RMTSOCKET)) != 0) {
PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s",
(data2 & IBS_OP_DATA2_STRMST) ? "streamstore " : "",
(data2 & IBS_OP_DATA2_RMTSOCKET) ? "remotesocket" : "");
}
PMCSTAT_PRINT_ENTRY("ibs-op", "Latency %" PRIu64,
IBS_OP_DATA3_TO_DCLAT(data3));
if ((data3 & IBS_OP_DATA3_DCLINADDRVALID) != 0) {
@@ -426,6 +444,15 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
PMCSTAT_PRINT_ENTRY("ibs-op", "Physical Address %" PRIx64,
ibsbuf[PMC_MPIDX_OP_DC_PHYSADDR]);
}
if (len64 > PMC_MPIDX_OP_CTL2) {
ctl2 = ibsbuf[PMC_MPIDX_OP_CTL2];
if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ1) != 0)
PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=0");
if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ0) != 0)
PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=1");
if ((ctl2 & IBS_OP_CTL2_STRMSTFILTER) != 0)
PMCSTAT_PRINT_ENTRY("ibs-op", "streamstore");
}
}
#endif
@@ -446,9 +473,11 @@ pmcstat_print_multipart(struct pmclog_ev_callchain *cc)
return (offset);
#if defined(__amd64__) || defined(__i386__)
} else if (type == PMC_CC_MULTIPART_IBS_FETCH) {
pmcstat_print_ibs_fetch(cc, offset);
pmcstat_print_ibs_fetch(cc, offset,
len / (sizeof(uint64_t) / sizeof(uintptr_t)));
} else if (type == PMC_CC_MULTIPART_IBS_OP) {
pmcstat_print_ibs_op(cc, offset);
pmcstat_print_ibs_op(cc, offset,
len / (sizeof(uint64_t) / sizeof(uintptr_t)));
#endif
} else {
PMCSTAT_PRINT_ENTRY("unsupported multipart type!");