iflib: accurately count bytes/segments for TSO

When using software based ifnet counters, iflib has not factored
TSO into account when reporting the segments and bytes sent.
So it will underreport NIC bandwidth by a small percent,
and will undercount sent segments by a large factor.
Fix this by calculating the number of added segments the NIC
will send, and add header size multiplied by that number
to arrive at a correct accounting of segments and bytes sent.
This makes these software counters directly comparable to
hardware counters.

Doing this requires moving the calculation into iflib_encap() where
we have already parsed the packet and know the header size, MSS, etc.

Differential Revision: https://reviews.freebsd.org/D56338
Sponsored by: Netflix
This commit is contained in:
Andrew Gallatin
2026-04-17 11:45:22 -04:00
parent 52e7958702
commit 3fade68cfd
+20 -8
View File
@@ -3478,7 +3478,7 @@ iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size)
} }
static int static int
iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) iflib_encap(iflib_txq_t txq, struct mbuf **m_headp, int *obytes, int *opkts)
{ {
if_ctx_t ctx; if_ctx_t ctx;
if_shared_ctx_t sctx; if_shared_ctx_t sctx;
@@ -3663,6 +3663,20 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
*/ */
txq->ift_pidx = pi.ipi_new_pidx; txq->ift_pidx = pi.ipi_new_pidx;
txq->ift_npending += pi.ipi_ndescs; txq->ift_npending += pi.ipi_ndescs;
/*
* Update packets / bytes sent
*/
if (flags & IFLIB_TSO) {
int hlen = pi.ipi_ehdrlen + pi.ipi_ip_hlen + pi.ipi_tcp_hlen;
int tsolen = pi.ipi_len - hlen;
int nsegs = (tsolen + pi.ipi_tso_segsz - 1) / pi.ipi_tso_segsz;
*obytes += tsolen + nsegs * hlen;
*opkts += nsegs;
} else {
*obytes += pi.ipi_len;
*opkts += 1;
}
} else { } else {
*m_headp = m_head = iflib_remove_mbuf(txq); *m_headp = m_head = iflib_remove_mbuf(txq);
if (err == EFBIG) { if (err == EFBIG) {
@@ -3918,7 +3932,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
skipped++; skipped++;
continue; continue;
} }
err = iflib_encap(txq, mp); err = iflib_encap(txq, mp, &bytes_sent, &pkt_sent);
if (__predict_false(err)) { if (__predict_false(err)) {
/* no room - bail out */ /* no room - bail out */
if (err == ENOBUFS) if (err == ENOBUFS)
@@ -3927,10 +3941,8 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
/* we can't send this packet - skip it */ /* we can't send this packet - skip it */
continue; continue;
} }
pkt_sent++;
m = *mp; m = *mp;
DBG_COUNTER_INC(tx_sent); DBG_COUNTER_INC(tx_sent);
bytes_sent += m->m_pkthdr.len;
mcast_sent += !!(m->m_flags & M_MCAST); mcast_sent += !!(m->m_flags & M_MCAST);
if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))) if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)))
@@ -7136,6 +7148,8 @@ iflib_debugnet_transmit(if_t ifp, struct mbuf *m)
if_ctx_t ctx; if_ctx_t ctx;
iflib_txq_t txq; iflib_txq_t txq;
int error; int error;
int bytes_sent = 0;
int pkt_sent = 0;
ctx = if_getsoftc(ifp); ctx = if_getsoftc(ifp);
if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
@@ -7143,7 +7157,7 @@ iflib_debugnet_transmit(if_t ifp, struct mbuf *m)
return (EBUSY); return (EBUSY);
txq = &ctx->ifc_txqs[0]; txq = &ctx->ifc_txqs[0];
error = iflib_encap(txq, &m); error = iflib_encap(txq, &m, &bytes_sent, &pkt_sent);
if (error == 0) if (error == 0)
(void)iflib_txd_db_check(txq, true); (void)iflib_txd_db_check(txq, true);
return (error); return (error);
@@ -7209,10 +7223,8 @@ iflib_simple_transmit(if_t ifp, struct mbuf *m)
txq = iflib_simple_select_queue(ctx, m); txq = iflib_simple_select_queue(ctx, m);
mtx_lock(&txq->ift_mtx); mtx_lock(&txq->ift_mtx);
error = iflib_encap(txq, &m); error = iflib_encap(txq, &m, &bytes_sent, &pkt_sent);
if (error == 0) { if (error == 0) {
pkt_sent++;
bytes_sent += m->m_pkthdr.len;
mcast_sent += !!(m->m_flags & M_MCAST); mcast_sent += !!(m->m_flags & M_MCAST);
(void)iflib_txd_db_check(txq, true); (void)iflib_txd_db_check(txq, true);
} else { } else {