gve: Use load-acquire to fetch generation bits

When running the driver using the DQO queue format, we must load the
generation bit and check it before possibly reading the rest of the
descriptor's fields.

Previously, we guarded against reordering of reads using an explicit
thread fence. This commit changes the thread fence to a load with
acquire semantics. Because the tx and rx generation fields are in a
bitfield, we cannot explicitly address them in an atomic load. Instead
we load the respective containing bytes in the descriptor and mask them
appropriately.

Signed-off-by: Jasper Tran O'Leary <jtranoleary@google.com>

Reviewed by:	markj
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D50384
This commit is contained in:
Jasper Tran O'Leary
2025-05-20 23:34:44 +00:00
committed by Mark Johnston
parent ac62b97951
commit b044f12537
4 changed files with 52 additions and 20 deletions
+2 -1
View File
@@ -377,7 +377,8 @@ struct stats {
_Static_assert(sizeof(struct stats) == 16,
"gve: bad admin queue struct length");
/* These are control path types for PTYPE which are the same as the data path
/*
* These are control path types for PTYPE which are the same as the data path
* types.
*/
struct gve_ptype_entry {
+16 -4
View File
@@ -208,9 +208,14 @@ _Static_assert(sizeof(struct gve_tx_metadata_dqo) == 12,
#define GVE_TX_METADATA_VERSION_DQO 0
/* Used to access the generation bit within a TX completion descriptor. */
#define GVE_TX_DESC_DQO_GEN_BYTE_OFFSET 1
#define GVE_TX_DESC_DQO_GEN_BIT_MASK 0x80
/* TX completion descriptor */
struct gve_tx_compl_desc_dqo {
/* For types 0-4 this is the TX queue ID associated with this
/*
* For types 0-4 this is the TX queue ID associated with this
* completion.
*/
uint16_t id:11;
@@ -222,12 +227,14 @@ struct gve_tx_compl_desc_dqo {
/* Flipped by HW to notify the descriptor is populated. */
uint16_t generation:1;
union {
/* For descriptor completions, this is the last index fetched
/*
* For descriptor completions, this is the last index fetched
* by HW + 1.
*/
__le16 tx_head;
/* For packet completions, this is the completion tag set on the
/*
* For packet completions, this is the completion tag set on the
* TX packet descriptors.
*/
__le16 completion_tag;
@@ -258,6 +265,10 @@ struct gve_rx_desc_dqo {
_Static_assert(sizeof(struct gve_rx_desc_dqo) == 32,
"gve: bad dqo desc struct length");
/* Used to access the generation bit within an RX completion descriptor. */
#define GVE_RX_DESC_DQO_GEN_BYTE_OFFSET 5
#define GVE_RX_DESC_DQO_GEN_BIT_MASK 0x40
/* Descriptor for HW to notify SW of new packets received on RX queue. */
struct gve_rx_compl_desc_dqo {
/* Must be 1 */
@@ -266,7 +277,8 @@ struct gve_rx_compl_desc_dqo {
/* Packet originated from this system rather than the network. */
uint8_t loopback:1;
/* Set when IPv6 packet contains a destination options header or routing
/*
* Set when IPv6 packet contains a destination options header or routing
* header.
*/
uint8_t ipv6_ex_add:1;
+17 -7
View File
@@ -962,6 +962,19 @@ gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
rx->ctx = (struct gve_rx_ctx){};
}
static uint8_t
gve_rx_get_gen_bit(uint8_t *desc)
{
uint8_t byte;
/*
* Prevent generation bit from being read after the rest of the
* descriptor.
*/
byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET);
return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0);
}
static bool
gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
{
@@ -971,17 +984,14 @@ gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
NET_EPOCH_ASSERT();
while (work_done < budget) {
bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
bus_dmamap_sync(rx->dqo.compl_ring_mem.tag,
rx->dqo.compl_ring_mem.map,
BUS_DMASYNC_POSTREAD);
compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
if (compl_desc->generation == rx->dqo.cur_gen_bit)
if (gve_rx_get_gen_bit((uint8_t *)compl_desc) ==
rx->dqo.cur_gen_bit)
break;
/*
* Prevent generation bit from being read after the rest of the
* descriptor.
*/
atomic_thread_fence_acq();
rx->cnt++;
rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
+17 -8
View File
@@ -1029,6 +1029,19 @@ gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
gve_tx_clear_compl_ring_dqo(tx);
}
static uint8_t
gve_tx_get_gen_bit(uint8_t *desc)
{
uint8_t byte;
/*
* Prevent generation bit from being read after the rest of the
* descriptor.
*/
byte = atomic_load_acq_8(desc + GVE_TX_DESC_DQO_GEN_BYTE_OFFSET);
return ((byte & GVE_TX_DESC_DQO_GEN_BIT_MASK) != 0);
}
static bool
gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
{
@@ -1041,20 +1054,16 @@ gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
uint16_t type;
while (work_done < budget) {
bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
bus_dmamap_sync(tx->dqo.compl_ring_mem.tag,
tx->dqo.compl_ring_mem.map,
BUS_DMASYNC_POSTREAD);
compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
if (compl_desc->generation == tx->dqo.cur_gen_bit)
if (gve_tx_get_gen_bit((uint8_t *)compl_desc) ==
tx->dqo.cur_gen_bit)
break;
/*
* Prevent generation bit from being read after the rest of the
* descriptor.
*/
atomic_thread_fence_acq();
type = compl_desc->type;
if (type == GVE_COMPL_TYPE_DQO_DESC) {
/* This is the last descriptor fetched by HW plus one */
tx_head = le16toh(compl_desc->tx_head);