92ba9b2fe5
Added RoCE support for BCM576xx controllers with below changes, 1. Update the BAR offsets for handling BCM576xx controllers. Use the values populated by the L2 driver for getting the Doorbell offsets. 2. Use msn index instead of tail to pull psn table entry. 3. Temporarily disable dbr pacing feature untill it is fully implemented. 4. Add support for 400G speed. Reviewed by: ssaxena Differential Revision: https://reviews.freebsd.org/D54521 MFC after: 3 days
540 lines
14 KiB
C
540 lines
14 KiB
C
/*
|
|
* Copyright (c) 2024, Broadcom. All rights reserved. The term
|
|
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
|
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
*/
|
|
|
|
#ifndef __BNXT_RE_MAIN_H__
|
|
#define __BNXT_RE_MAIN_H__
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/queue.h>
|
|
|
|
#include <infiniband/driver.h>
|
|
#include <infiniband/endian.h>
|
|
#include <infiniband/udma_barrier.h>
|
|
|
|
#include <inttypes.h>
|
|
#include <math.h>
|
|
#include <pthread.h>
|
|
#include <stdatomic.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "abi.h"
|
|
#include "list.h"
|
|
#include "memory.h"
|
|
|
|
#define DEV "bnxt_re : "
|
|
#define BNXT_RE_UD_QP_STALL 0x400000
|
|
|
|
#define CHIP_NUM_57508 0x1750
|
|
#define CHIP_NUM_57504 0x1751
|
|
#define CHIP_NUM_57502 0x1752
|
|
#define CHIP_NUM_58818 0xd818
|
|
#define CHIP_NUM_57608 0x1760
|
|
|
|
#define BNXT_NSEC_PER_SEC 1000000000UL
|
|
|
|
struct bnxt_re_chip_ctx {
|
|
__u16 chip_num;
|
|
__u8 chip_rev;
|
|
__u8 chip_metal;
|
|
bool chip_is_gen_p5_thor2;
|
|
};
|
|
|
|
#define BNXT_RE_MAP_WC 0x1000
|
|
#define BNXT_RE_DBR_PAGE 0x2000
|
|
#define BNXT_RE_DB_RECOVERY_PAGE 0x3000
|
|
|
|
#define BNXT_RE_DB_REPLAY_YIELD_CNT 256
|
|
#define BNXT_RE_DB_KEY_INVALID -1
|
|
#define BNXT_RE_MAX_DO_PACING 0xFFFF
|
|
#define bnxt_re_wm_barrier() udma_to_device_barrier()
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
|
|
#define CNA(v, d) \
|
|
{ .vendor = PCI_VENDOR_ID_##v, \
|
|
.device = d }
|
|
#define BNXT_RE_DEFINE_CNA_TABLE(_name) \
|
|
static const struct { \
|
|
unsigned vendor; \
|
|
unsigned device; \
|
|
} _name[]
|
|
|
|
struct bnxt_re_dpi {
|
|
__u32 dpindx;
|
|
__u32 wcdpi;
|
|
__u64 *dbpage;
|
|
__u64 *wcdbpg;
|
|
};
|
|
|
|
struct bnxt_re_pd {
|
|
struct ibv_pd ibvpd;
|
|
uint32_t pdid;
|
|
};
|
|
|
|
struct xorshift32_state {
|
|
uint32_t seed;
|
|
};
|
|
|
|
struct bnxt_re_cq {
|
|
struct ibv_cq ibvcq;
|
|
struct bnxt_re_list_head sfhead;
|
|
struct bnxt_re_list_head rfhead;
|
|
struct bnxt_re_list_head prev_cq_head;
|
|
struct bnxt_re_context *cntx;
|
|
struct bnxt_re_queue *cqq;
|
|
struct bnxt_re_dpi *udpi;
|
|
struct bnxt_re_mem *resize_mem;
|
|
struct bnxt_re_mem *mem;
|
|
struct bnxt_re_list_node dbnode;
|
|
uint64_t shadow_db_key;
|
|
uint32_t cqe_sz;
|
|
uint32_t cqid;
|
|
struct xorshift32_state rand;
|
|
int deferred_arm_flags;
|
|
bool first_arm;
|
|
bool deferred_arm;
|
|
bool phase;
|
|
uint8_t dbr_lock;
|
|
void *cq_page;
|
|
};
|
|
|
|
struct bnxt_re_push_buffer {
|
|
__u64 *pbuf; /*push wc buffer */
|
|
__u64 *wqe; /* hwqe addresses */
|
|
__u64 *ucdb;
|
|
uint32_t st_idx;
|
|
uint32_t qpid;
|
|
uint16_t wcdpi;
|
|
uint16_t nbit;
|
|
uint32_t tail;
|
|
};
|
|
|
|
enum bnxt_re_push_info_mask {
|
|
BNXT_RE_PUSH_SIZE_MASK = 0x1FUL,
|
|
BNXT_RE_PUSH_SIZE_SHIFT = 0x18UL
|
|
};
|
|
|
|
struct bnxt_re_db_ppp_hdr {
|
|
struct bnxt_re_db_hdr db_hdr;
|
|
__u64 rsv_psz_pidx;
|
|
};
|
|
|
|
struct bnxt_re_push_rec {
|
|
struct bnxt_re_dpi *udpi;
|
|
struct bnxt_re_push_buffer *pbuf;
|
|
__u32 pbmap; /* only 16 bits in use */
|
|
};
|
|
|
|
struct bnxt_re_wrid {
|
|
uint64_t wrid;
|
|
int next_idx;
|
|
uint32_t bytes;
|
|
uint8_t sig;
|
|
uint8_t slots;
|
|
uint8_t wc_opcd;
|
|
};
|
|
|
|
struct bnxt_re_qpcap {
|
|
uint32_t max_swr;
|
|
uint32_t max_rwr;
|
|
uint32_t max_ssge;
|
|
uint32_t max_rsge;
|
|
uint32_t max_inline;
|
|
uint8_t sqsig;
|
|
uint8_t is_atomic_cap;
|
|
};
|
|
|
|
struct bnxt_re_srq {
|
|
struct ibv_srq ibvsrq;
|
|
struct ibv_srq_attr cap;
|
|
uint32_t srqid;
|
|
struct bnxt_re_context *uctx;
|
|
struct bnxt_re_queue *srqq;
|
|
struct bnxt_re_wrid *srwrid;
|
|
struct bnxt_re_dpi *udpi;
|
|
struct bnxt_re_mem *mem;
|
|
int start_idx;
|
|
int last_idx;
|
|
struct bnxt_re_list_node dbnode;
|
|
uint64_t shadow_db_key;
|
|
struct xorshift32_state rand;
|
|
uint8_t dbr_lock;
|
|
bool arm_req;
|
|
void *srq_page;
|
|
};
|
|
|
|
struct bnxt_re_joint_queue {
|
|
struct bnxt_re_context *cntx;
|
|
struct bnxt_re_queue *hwque;
|
|
struct bnxt_re_wrid *swque;
|
|
uint32_t start_idx;
|
|
uint32_t last_idx;
|
|
};
|
|
|
|
struct bnxt_re_qp {
|
|
struct ibv_qp ibvqp;
|
|
struct bnxt_re_qpcap cap;
|
|
struct bnxt_re_context *cntx;
|
|
struct bnxt_re_chip_ctx *cctx;
|
|
struct bnxt_re_joint_queue *jsqq;
|
|
struct bnxt_re_joint_queue *jrqq;
|
|
struct bnxt_re_dpi *udpi;
|
|
uint64_t wqe_cnt;
|
|
uint16_t mtu;
|
|
uint16_t qpst;
|
|
uint8_t qptyp;
|
|
uint8_t qpmode;
|
|
uint8_t push_st_en;
|
|
uint8_t ppp_idx;
|
|
uint32_t sq_psn;
|
|
uint32_t sq_msn;
|
|
uint32_t qpid;
|
|
uint16_t max_push_sz;
|
|
uint8_t sq_dbr_lock;
|
|
uint8_t rq_dbr_lock;
|
|
struct xorshift32_state rand;
|
|
struct bnxt_re_list_node snode;
|
|
struct bnxt_re_list_node rnode;
|
|
struct bnxt_re_srq *srq;
|
|
struct bnxt_re_cq *rcq;
|
|
struct bnxt_re_cq *scq;
|
|
struct bnxt_re_mem *mem;/* at cl 6 */
|
|
struct bnxt_re_list_node dbnode;
|
|
uint64_t sq_shadow_db_key;
|
|
uint64_t rq_shadow_db_key;
|
|
};
|
|
|
|
struct bnxt_re_mr {
|
|
struct ibv_mr vmr;
|
|
};
|
|
|
|
struct bnxt_re_ah {
|
|
struct ibv_ah ibvah;
|
|
struct bnxt_re_pd *pd;
|
|
uint32_t avid;
|
|
};
|
|
|
|
struct bnxt_re_dev {
|
|
struct verbs_device vdev;
|
|
struct ibv_device_attr devattr;
|
|
uint32_t pg_size;
|
|
uint32_t cqe_size;
|
|
uint32_t max_cq_depth;
|
|
uint8_t abi_version;
|
|
};
|
|
|
|
struct bnxt_re_res_list {
|
|
struct bnxt_re_list_head head;
|
|
pthread_spinlock_t lock;
|
|
};
|
|
|
|
struct bnxt_re_context {
|
|
struct ibv_context ibvctx;
|
|
struct bnxt_re_dev *rdev;
|
|
struct bnxt_re_chip_ctx *cctx;
|
|
uint64_t comp_mask;
|
|
struct bnxt_re_dpi udpi;
|
|
uint32_t dev_id;
|
|
uint32_t max_qp;
|
|
uint32_t max_srq;
|
|
uint32_t modes;
|
|
void *shpg;
|
|
pthread_mutex_t shlock;
|
|
struct bnxt_re_push_rec *pbrec;
|
|
void *dbr_page;
|
|
void *bar_map;
|
|
struct bnxt_re_res_list qp_dbr_res;
|
|
struct bnxt_re_res_list cq_dbr_res;
|
|
struct bnxt_re_res_list srq_dbr_res;
|
|
void *db_recovery_page;
|
|
struct ibv_comp_channel *dbr_ev_chan;
|
|
struct ibv_cq *dbr_cq;
|
|
pthread_t dbr_thread;
|
|
uint64_t replay_cnt;
|
|
};
|
|
|
|
struct bnxt_re_pacing_data {
|
|
uint32_t do_pacing;
|
|
uint32_t pacing_th;
|
|
uint32_t dev_err_state;
|
|
uint32_t alarm_th;
|
|
};
|
|
|
|
/* Chip context related functions */
|
|
bool _is_chip_gen_p5(struct bnxt_re_chip_ctx *cctx);
|
|
bool _is_chip_a0(struct bnxt_re_chip_ctx *cctx);
|
|
bool _is_chip_thor2(struct bnxt_re_chip_ctx *cctx);
|
|
bool _is_chip_gen_p5_thor2(struct bnxt_re_chip_ctx *cctx);
|
|
|
|
/* DB ring functions used internally*/
|
|
void bnxt_re_ring_rq_db(struct bnxt_re_qp *qp);
|
|
void bnxt_re_ring_sq_db(struct bnxt_re_qp *qp);
|
|
void bnxt_re_ring_srq_arm(struct bnxt_re_srq *srq);
|
|
void bnxt_re_ring_srq_db(struct bnxt_re_srq *srq);
|
|
void bnxt_re_ring_cq_db(struct bnxt_re_cq *cq);
|
|
void bnxt_re_ring_cq_arm_db(struct bnxt_re_cq *cq, uint8_t aflag);
|
|
|
|
void bnxt_re_ring_pstart_db(struct bnxt_re_qp *qp,
|
|
struct bnxt_re_push_buffer *pbuf);
|
|
void bnxt_re_ring_pend_db(struct bnxt_re_qp *qp,
|
|
struct bnxt_re_push_buffer *pbuf);
|
|
void bnxt_re_fill_push_wcb(struct bnxt_re_qp *qp,
|
|
struct bnxt_re_push_buffer *pbuf,
|
|
uint32_t idx);
|
|
|
|
void bnxt_re_fill_ppp(struct bnxt_re_push_buffer *pbuf,
|
|
struct bnxt_re_qp *qp, uint8_t len, uint32_t idx);
|
|
int bnxt_re_init_pbuf_list(struct bnxt_re_context *cntx);
|
|
void bnxt_re_destroy_pbuf_list(struct bnxt_re_context *cntx);
|
|
struct bnxt_re_push_buffer *bnxt_re_get_pbuf(uint8_t *push_st_en,
|
|
uint8_t ppp_idx,
|
|
struct bnxt_re_context *cntx);
|
|
void bnxt_re_put_pbuf(struct bnxt_re_context *cntx,
|
|
struct bnxt_re_push_buffer *pbuf);
|
|
|
|
void bnxt_re_db_recovery(struct bnxt_re_context *cntx);
|
|
void *bnxt_re_dbr_thread(void *arg);
|
|
bool _is_db_drop_recovery_enable(struct bnxt_re_context *cntx);
|
|
int bnxt_re_poll_kernel_cq(struct bnxt_re_cq *cq);
|
|
extern int bnxt_single_threaded;
|
|
extern int bnxt_dyn_debug;
|
|
|
|
#define bnxt_re_trace(fmt, ...) \
|
|
{ \
|
|
if (bnxt_dyn_debug) \
|
|
fprintf(stderr, fmt, ##__VA_ARGS__); \
|
|
}
|
|
|
|
/* pointer conversion functions*/
|
|
static inline struct bnxt_re_dev *to_bnxt_re_dev(struct ibv_device *ibvdev)
|
|
{
|
|
return container_of(ibvdev, struct bnxt_re_dev, vdev);
|
|
}
|
|
|
|
static inline struct bnxt_re_context *to_bnxt_re_context(
|
|
struct ibv_context *ibvctx)
|
|
{
|
|
return container_of(ibvctx, struct bnxt_re_context, ibvctx);
|
|
}
|
|
|
|
static inline struct bnxt_re_pd *to_bnxt_re_pd(struct ibv_pd *ibvpd)
|
|
{
|
|
return container_of(ibvpd, struct bnxt_re_pd, ibvpd);
|
|
}
|
|
|
|
static inline struct bnxt_re_cq *to_bnxt_re_cq(struct ibv_cq *ibvcq)
|
|
{
|
|
return container_of(ibvcq, struct bnxt_re_cq, ibvcq);
|
|
}
|
|
|
|
static inline struct bnxt_re_qp *to_bnxt_re_qp(struct ibv_qp *ibvqp)
|
|
{
|
|
return container_of(ibvqp, struct bnxt_re_qp, ibvqp);
|
|
}
|
|
|
|
static inline struct bnxt_re_srq *to_bnxt_re_srq(struct ibv_srq *ibvsrq)
|
|
{
|
|
return container_of(ibvsrq, struct bnxt_re_srq, ibvsrq);
|
|
}
|
|
|
|
static inline struct bnxt_re_ah *to_bnxt_re_ah(struct ibv_ah *ibvah)
|
|
{
|
|
return container_of(ibvah, struct bnxt_re_ah, ibvah);
|
|
}
|
|
|
|
/* CQE manipulations */
|
|
#define bnxt_re_get_cqe_sz() (sizeof(struct bnxt_re_req_cqe) + \
|
|
sizeof(struct bnxt_re_bcqe))
|
|
#define bnxt_re_get_sqe_hdr_sz() (sizeof(struct bnxt_re_bsqe) + \
|
|
sizeof(struct bnxt_re_send))
|
|
#define bnxt_re_get_srqe_hdr_sz() (sizeof(struct bnxt_re_brqe) + \
|
|
sizeof(struct bnxt_re_srqe))
|
|
#define bnxt_re_get_srqe_sz() (sizeof(struct bnxt_re_brqe) + \
|
|
sizeof(struct bnxt_re_srqe) + \
|
|
BNXT_RE_MAX_INLINE_SIZE)
|
|
#define bnxt_re_is_cqe_valid(valid, phase) \
|
|
(((valid) & BNXT_RE_BCQE_PH_MASK) == (phase))
|
|
|
|
static inline void bnxt_re_change_cq_phase(struct bnxt_re_cq *cq)
|
|
{
|
|
if (!cq->cqq->head)
|
|
cq->phase = !(cq->phase & BNXT_RE_BCQE_PH_MASK);
|
|
}
|
|
|
|
static inline void *bnxt_re_get_swqe(struct bnxt_re_joint_queue *jqq,
|
|
uint32_t *wqe_idx)
|
|
{
|
|
if (wqe_idx)
|
|
*wqe_idx = jqq->start_idx;
|
|
return &jqq->swque[jqq->start_idx];
|
|
}
|
|
|
|
static inline void bnxt_re_jqq_mod_start(struct bnxt_re_joint_queue *jqq,
|
|
uint32_t idx)
|
|
{
|
|
jqq->start_idx = jqq->swque[idx].next_idx;
|
|
}
|
|
|
|
static inline void bnxt_re_jqq_mod_last(struct bnxt_re_joint_queue *jqq,
|
|
uint32_t idx)
|
|
{
|
|
jqq->last_idx = jqq->swque[idx].next_idx;
|
|
}
|
|
|
|
static inline uint32_t bnxt_re_init_depth(uint32_t ent, uint64_t cmask)
|
|
{
|
|
return cmask & BNXT_RE_COMP_MASK_UCNTX_POW2_DISABLED ?
|
|
ent : roundup_pow_of_two(ent);
|
|
}
|
|
|
|
static inline uint32_t bnxt_re_get_diff(uint64_t cmask)
|
|
{
|
|
return cmask & BNXT_RE_COMP_MASK_UCNTX_RSVD_WQE_DISABLED ?
|
|
0 : BNXT_RE_FULL_FLAG_DELTA;
|
|
}
|
|
|
|
static inline int bnxt_re_calc_wqe_sz(int nsge)
|
|
{
|
|
/* This is used for both sq and rq. In case hdr size differs
|
|
* in future move to individual functions.
|
|
*/
|
|
return sizeof(struct bnxt_re_sge) * nsge + bnxt_re_get_sqe_hdr_sz();
|
|
}
|
|
|
|
/* Helper function to copy to push buffers */
|
|
static inline void bnxt_re_copy_data_to_pb(struct bnxt_re_push_buffer *pbuf,
|
|
uint8_t offset, uint32_t idx)
|
|
{
|
|
__u64 *src;
|
|
__u64 *dst;
|
|
int indx;
|
|
|
|
for (indx = 0; indx < idx; indx++) {
|
|
dst = (__u64 *)(pbuf->pbuf + 2 * (indx + offset));
|
|
src = (__u64 *)pbuf->wqe[indx];
|
|
iowrite64(dst, *src);
|
|
|
|
dst++;
|
|
src++;
|
|
iowrite64(dst, *src);
|
|
}
|
|
}
|
|
|
|
static inline int bnxt_re_dp_spin_init(struct bnxt_spinlock *lock, int pshared, int need_lock)
|
|
{
|
|
lock->in_use = 0;
|
|
lock->need_lock = need_lock;
|
|
return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE);
|
|
}
|
|
|
|
static inline int bnxt_re_dp_spin_destroy(struct bnxt_spinlock *lock)
|
|
{
|
|
return pthread_spin_destroy(&lock->lock);
|
|
}
|
|
|
|
static inline int bnxt_spin_lock(struct bnxt_spinlock *lock)
|
|
{
|
|
if (lock->need_lock)
|
|
return pthread_spin_lock(&lock->lock);
|
|
|
|
if (unlikely(lock->in_use)) {
|
|
fprintf(stderr, "*** ERROR: multithreading violation ***\n"
|
|
"You are running a multithreaded application but\n"
|
|
"you set BNXT_SINGLE_THREADED=1. Please unset it.\n");
|
|
abort();
|
|
} else {
|
|
lock->in_use = 1;
|
|
/* This fence is not at all correct, but it increases the */
|
|
/* chance that in_use is detected by another thread without */
|
|
/* much runtime cost. */
|
|
atomic_thread_fence(memory_order_acq_rel);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int bnxt_spin_unlock(struct bnxt_spinlock *lock)
|
|
{
|
|
if (lock->need_lock)
|
|
return pthread_spin_unlock(&lock->lock);
|
|
|
|
lock->in_use = 0;
|
|
return 0;
|
|
}
|
|
|
|
static void timespec_sub(const struct timespec *a, const struct timespec *b,
|
|
struct timespec *res)
|
|
{
|
|
res->tv_sec = a->tv_sec - b->tv_sec;
|
|
res->tv_nsec = a->tv_nsec - b->tv_nsec;
|
|
if (res->tv_nsec < 0) {
|
|
res->tv_sec--;
|
|
res->tv_nsec += BNXT_NSEC_PER_SEC;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function waits in a busy loop for a given nano seconds
|
|
* The maximum wait period allowed is less than one second
|
|
*/
|
|
static inline void bnxt_re_sub_sec_busy_wait(uint32_t nsec)
|
|
{
|
|
struct timespec start, cur, res;
|
|
|
|
if (nsec >= BNXT_NSEC_PER_SEC)
|
|
return;
|
|
|
|
if (clock_gettime(CLOCK_REALTIME, &start)) {
|
|
fprintf(stderr, "%s: failed to get time : %d",
|
|
__func__, errno);
|
|
return;
|
|
}
|
|
|
|
while (1) {
|
|
if (clock_gettime(CLOCK_REALTIME, &cur)) {
|
|
fprintf(stderr, "%s: failed to get time : %d",
|
|
__func__, errno);
|
|
return;
|
|
}
|
|
|
|
timespec_sub(&cur, &start, &res);
|
|
if (res.tv_nsec >= nsec)
|
|
break;
|
|
}
|
|
}
|
|
|
|
#define BNXT_RE_HW_RETX(a) ((a)->comp_mask & BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED)
|
|
#define bnxt_re_dp_spin_lock(lock) bnxt_spin_lock(lock)
|
|
#define bnxt_re_dp_spin_unlock(lock) bnxt_spin_unlock(lock)
|
|
|
|
#endif
|