gve: Add support for 4k RX Buffers when using DQO queue formats

This change adds support for using 4K RX Buffers when using DQO queue
formats when a boot-time tunable flag is set to true by the user.
When this flag is enabled, the driver will use 4K RX Buffer size either
when HW LRO is enabled or mtu > 2048.

Signed-off-by: Vee Agarwal <veethebee@google.com>

Reviewed by:	markj, ziaee
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D50786
This commit is contained in:
Vee Agarwal
2025-06-13 17:52:25 +00:00
committed by Mark Johnston
parent 67c5db938c
commit 71702df612
7 changed files with 84 additions and 17 deletions
+8
View File
@@ -230,6 +230,14 @@ The default value is 0, which means hardware LRO is enabled by default.
The software LRO stack in the kernel is always used.
This sysctl variable needs to be set before loading the driver, using
.Xr loader.conf 5 .
.It Va hw.gve.allow_4k_rx_buffers
Setting this boot-time tunable to 1 enables support for 4K RX Buffers.
The default value is 0, which means 2K RX Buffers will be used.
4K RX Buffers are only supported on DQO_RDA and DQO_QPL queue formats.
When enabled, 4K RX Buffers will be used either when HW LRO is enabled
or mtu is greated than 2048.
This sysctl variable needs to be set before loading the driver, using
.Xr loader.conf 5 .
.It Va dev.gve.X.num_rx_queues and dev.gve.X.num_tx_queues
Run-time tunables that represent the number of currently used RX/TX queues.
The default value is the max number of RX/TX queues the device can support.
+20
View File
@@ -65,6 +65,7 @@
#define ADMINQ_SIZE PAGE_SIZE
#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
#define GVE_4K_RX_BUFFER_SIZE_DQO 4096
/* Each RX bounce buffer page can fit two packet buffers. */
#define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2)
@@ -84,6 +85,11 @@
static MALLOC_DEFINE(M_GVE, "gve", "gve allocations");
_Static_assert(MCLBYTES == GVE_DEFAULT_RX_BUFFER_SIZE,
"gve: bad MCLBYTES length");
_Static_assert(MJUMPAGESIZE >= GVE_4K_RX_BUFFER_SIZE_DQO,
"gve: bad MJUMPAGESIZE length");
struct gve_dma_handle {
bus_addr_t bus_addr;
void *cpu_addr;
@@ -633,6 +639,7 @@ struct gve_priv {
/* The index of tx queue that the timer service will check on its next invocation */
uint16_t check_tx_queue_idx;
uint16_t rx_buf_size_dqo;
};
static inline bool
@@ -666,6 +673,18 @@ gve_is_qpl(struct gve_priv *priv)
priv->queue_format == GVE_DQO_QPL_FORMAT);
}
static inline bool
gve_is_4k_rx_buf(struct gve_priv *priv)
{
return (priv->rx_buf_size_dqo == GVE_4K_RX_BUFFER_SIZE_DQO);
}
static inline bus_size_t
gve_rx_dqo_mbuf_segment_size(struct gve_priv *priv)
{
return (gve_is_4k_rx_buf(priv) ? MJUMPAGESIZE : MCLBYTES);
}
/* Defined in gve_main.c */
void gve_schedule_reset(struct gve_priv *priv);
int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt);
@@ -746,6 +765,7 @@ bool gve_timestamp_valid(int64_t *timestamp_sec);
/* Systcl functions defined in gve_sysctl.c */
extern bool gve_disable_hw_lro;
extern bool gve_allow_4k_rx_buffers;
extern char gve_queue_format[8];
extern char gve_version[8];
void gve_setup_sysctl(struct gve_priv *priv);
+4 -1
View File
@@ -296,7 +296,6 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index)
.ntfy_id = htobe32(rx->com.ntfy_id),
.queue_resources_addr = htobe64(qres_dma->bus_addr),
.rx_ring_size = htobe16(priv->rx_desc_cnt),
.packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE),
};
if (gve_is_gqi(priv)) {
@@ -308,6 +307,8 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index)
htobe32(queue_index);
cmd.create_rx_queue.queue_page_list_id =
htobe32((rx->com.qpl)->id);
cmd.create_rx_queue.packet_buffer_size =
htobe16(GVE_DEFAULT_RX_BUFFER_SIZE);
} else {
cmd.create_rx_queue.queue_page_list_id =
htobe32(GVE_RAW_ADDRESSING_QPL_ID);
@@ -320,6 +321,8 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index)
cmd.create_rx_queue.enable_rsc =
!!((if_getcapenable(priv->ifp) & IFCAP_LRO) &&
!gve_disable_hw_lro);
cmd.create_rx_queue.packet_buffer_size =
htobe16(priv->rx_buf_size_dqo);
}
return (gve_adminq_execute_cmd(priv, &cmd));
+6 -2
View File
@@ -59,8 +59,6 @@
*/
#define GVE_RX_DQO_MIN_PENDING_BUFS 128
#define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE)
/*
* gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total
* number of pages per QPL to 2048.
@@ -330,4 +328,10 @@ struct gve_rx_compl_desc_dqo {
_Static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32,
"gve: bad dqo desc struct length");
static inline uint8_t
gve_get_dq_num_frags_in_page(struct gve_priv *priv)
{
return (PAGE_SIZE / priv->rx_buf_size_dqo);
}
#endif /* _GVE_DESC_DQO_H_ */
+27 -3
View File
@@ -35,7 +35,7 @@
#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n"
#define GVE_VERSION_MAJOR 1
#define GVE_VERSION_MINOR 3
#define GVE_VERSION_SUB 4
#define GVE_VERSION_SUB 5
#define GVE_DEFAULT_RX_COPYBREAK 256
@@ -382,12 +382,27 @@ gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx)
return (0);
}
static int
gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu)
{
/*
* Use 4k buffers only if mode is DQ, 4k buffers flag is on,
* and either hw LRO is enabled or mtu is greater than 2048
*/
if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers &&
(!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE))
return (GVE_4K_RX_BUFFER_SIZE_DQO);
return (GVE_DEFAULT_RX_BUFFER_SIZE);
}
static int
gve_set_mtu(if_t ifp, uint32_t new_mtu)
{
struct gve_priv *priv = if_getsoftc(ifp);
const uint32_t max_problem_range = 8227;
const uint32_t min_problem_range = 7822;
uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu);
int err;
if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) {
@@ -402,9 +417,10 @@ gve_set_mtu(if_t ifp, uint32_t new_mtu)
* in throughput.
*/
if (!gve_is_gqi(priv) && !gve_disable_hw_lro &&
new_mtu >= min_problem_range && new_mtu <= max_problem_range) {
new_mtu >= min_problem_range && new_mtu <= max_problem_range &&
new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) {
device_printf(priv->dev,
"Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n",
"Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n",
new_mtu, min_problem_range, max_problem_range);
return (EINVAL);
}
@@ -414,6 +430,13 @@ gve_set_mtu(if_t ifp, uint32_t new_mtu)
if (bootverbose)
device_printf(priv->dev, "MTU set to %d\n", new_mtu);
if_setmtu(ifp, new_mtu);
/* Need to re-alloc RX queues if RX buffer size changed */
if (!gve_is_gqi(priv) &&
new_rx_buf_size != priv->rx_buf_size_dqo) {
gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
priv->rx_buf_size_dqo = new_rx_buf_size;
gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
}
} else {
device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu);
}
@@ -1064,6 +1087,7 @@ gve_attach(device_t dev)
if (err != 0)
goto abort;
priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu);
err = gve_alloc_rings(priv);
if (err != 0)
goto abort;
+15 -11
View File
@@ -140,15 +140,17 @@ gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
return (0);
}
bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv);
err = bus_dma_tag_create(
bus_get_dma_tag(priv->dev), /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MCLBYTES, /* maxsize */
max_seg_size, /* maxsize */
1, /* nsegments */
MCLBYTES, /* maxsegsize */
max_seg_size, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
@@ -317,7 +319,8 @@ gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
}
SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size);
if (__predict_false(!buf->mbuf)) {
err = ENOMEM;
counter_enter();
@@ -325,7 +328,7 @@ gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
counter_exit();
goto abort_with_buf;
}
buf->mbuf->m_len = MCLBYTES;
buf->mbuf->m_len = segment_size;
err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
@@ -371,7 +374,7 @@ gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
BUS_DMASYNC_PREREAD);
desc->buf_addr = htole64(page_dma_handle->bus_addr +
frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
frag_num * rx->com.priv->rx_buf_size_dqo);
buf->num_nic_frags++;
gve_rx_advance_head_dqo(rx);
@@ -430,7 +433,7 @@ gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
}
gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1)
buf->next_idx = 0;
else
buf->next_idx++;
@@ -742,7 +745,7 @@ gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
int page_idx = buf - rx->dqo.bufs;
void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
return (va);
}
@@ -753,15 +756,16 @@ gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
{
void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
struct mbuf *mbuf;
bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
if (ctx->mbuf_tail == NULL) {
mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size);
if (mbuf == NULL)
return (ENOMEM);
ctx->mbuf_head = mbuf;
ctx->mbuf_tail = mbuf;
} else {
mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size);
if (mbuf == NULL)
return (ENOMEM);
ctx->mbuf_tail->m_next = mbuf;
@@ -809,7 +813,7 @@ gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
page_idx = buf - rx->dqo.bufs;
page = rx->com.qpl->pages[page_idx];
page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
/*
* Grab an extra ref to the page so that gve_mextadd_free
@@ -855,7 +859,7 @@ gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
}
buf = &rx->dqo.bufs[buf_id];
if (__predict_false(buf->num_nic_frags == 0 ||
buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) {
device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
"with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
+4
View File
@@ -37,6 +37,10 @@ bool gve_disable_hw_lro = false;
SYSCTL_BOOL(_hw_gve, OID_AUTO, disable_hw_lro, CTLFLAG_RDTUN,
&gve_disable_hw_lro, 0, "Controls if hardware LRO is used");
bool gve_allow_4k_rx_buffers = false;
SYSCTL_BOOL(_hw_gve, OID_AUTO, allow_4k_rx_buffers, CTLFLAG_RDTUN,
&gve_allow_4k_rx_buffers, 0, "Controls if 4K RX Buffers are allowed");
char gve_queue_format[8];
SYSCTL_STRING(_hw_gve, OID_AUTO, queue_format, CTLFLAG_RD,
&gve_queue_format, 0, "Queue format being used by the iface");