tcp_hpts: move HPTS related fields from inpcb to tcpcb

This makes inpcb lighter and allows future cache line optimizations
of tcpcb.  The reason why HPTS originally used inpcb is the compressed
TIME-WAIT state (see 0d7445193a), that used to free a tcpcb, while the
associated connection is still on the HPTS ring.

Reviewed by:		rrs
Differential Revision:	https://reviews.freebsd.org/D39697
This commit is contained in:
Gleb Smirnoff
2023-04-25 12:18:33 -07:00
parent 144259f673
commit c2a69e846f
11 changed files with 383 additions and 459 deletions
+1 -73
View File
@@ -145,7 +145,6 @@ struct in_conninfo {
* lock is to be obtained and SMR section exited.
*
* Key:
* (b) - Protected by the hpts lock.
* (c) - Constant after initialization
* (e) - Protected by the SMR section
* (i) - Protected by the inpcb lock
@@ -154,51 +153,6 @@ struct in_conninfo {
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
* Notes on the tcp_hpts:
*
* First Hpts lock order is
* 1) INP_WLOCK()
* 2) HPTS_LOCK() i.e. hpts->pmtx
*
* To insert a TCB on the hpts you *must* be holding the INP_WLOCK().
* You may check the inp->inp_in_hpts flag without the hpts lock.
* The hpts is the only one that will clear this flag holding
* only the hpts lock. This means that in your tcp_output()
* routine when you test for the inp_in_hpts flag to be 1
* it may be transitioning to 0 (by the hpts).
* That's ok since that will just mean an extra call to tcp_output
* that most likely will find the call you executed
* (when the mis-match occurred) will have put the TCB back
* on the hpts and it will return. If your
* call did not add the inp back to the hpts then you will either
* over-send or the cwnd will block you from sending more.
*
* Note you should also be holding the INP_WLOCK() when you
* call the remove from the hpts as well. Though usually
* you are either doing this from a timer, where you need and have
* the INP_WLOCK() or from destroying your TCB where again
* you should already have the INP_WLOCK().
*
* The inp_hpts_cpu, inp_hpts_cpu_set, inp_input_cpu and
* inp_input_cpu_set fields are controlled completely by
* the hpts. Do not ever set these. The inp_hpts_cpu_set
* and inp_input_cpu_set fields indicate if the hpts has
* setup the respective cpu field. It is advised if this
* field is 0, to enqueue the packet with the appropriate
* hpts_immediate() call. If the _set field is 1, then
* you may compare the inp_*_cpu field to the curcpu and
* may want to again insert onto the hpts if these fields
* are not equal (i.e. you are not on the expected CPU).
*
* A note on inp_hpts_calls and inp_input_calls, these
* flags are set when the hpts calls either the output
* or do_segment routines respectively. If the routine
* being called wants to use this, then it needs to
* clear the flag before returning. The hpts will not
* clear the flag. The flags can be used to tell if
* the hpts is the function calling the respective
* routine.
*
* A few other notes:
*
* When a read lock is held, stability of the field is guaranteed; to write
@@ -219,41 +173,15 @@ struct inpcb {
CK_LIST_ENTRY(inpcb) inp_hash_wild; /* hash table linkage */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
#define inp_start_zero inp_hpts
#define inp_start_zero inp_refcount
#define inp_zero_size (sizeof(struct inpcb) - \
offsetof(struct inpcb, inp_start_zero))
TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */
uint32_t inp_hpts_gencnt; /* XXXGL */
uint32_t inp_hpts_request; /* Current hpts request, zero if
* fits in the pacing window (i&b). */
/*
* Note the next fields are protected by a
* different lock (hpts-lock). This means that
* they must correspond in size to the smallest
* protectable bit field (uint8_t on x86, and
* other platfomrs potentially uint32_t?). Also
* since CPU switches can occur at different times the two
* fields can *not* be collapsed into a signal bit field.
*/
#if defined(__amd64__) || defined(__i386__)
uint8_t inp_in_hpts; /* on output hpts (lock b) */
#else
uint32_t inp_in_hpts; /* on output hpts (lock b) */
#endif
volatile uint16_t inp_hpts_cpu; /* Lock (i) */
volatile uint16_t inp_irq_cpu; /* Set by LRO in behalf of or the driver */
u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
inp_hpts_calls :1, /* (i) from output hpts */
inp_irq_cpu_set :1, /* (i) from LRO/Driver */
inp_spare_bits2 : 3;
uint8_t inp_numa_domain; /* numa domain */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
int32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */