ifnet: make if_index global
Now that ifindex is static to if.c we can unvirtualize it. For lifetime of an ifnet its index never changes. To avoid leaking foreign interfaces the net.link.generic.system.ifcount sysctl and the ifnet_byindex() KPI filter their returned value on curvnet. Since if_vmove() no longer changes the if_index, inline ifindex_alloc() and ifindex_free() into if_alloc() and if_free() respectively. API wise the only change is that now minimum interface index can be greater than 1. The holes in interface indexes were always allowed. Reviewed by: kp Differential revision: https://reviews.freebsd.org/D33672
This commit is contained in:
+81
-125
@@ -311,19 +311,30 @@ VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
|
|||||||
VNET_DEFINE(struct ifgrouphead, ifg_head);
|
VNET_DEFINE(struct ifgrouphead, ifg_head);
|
||||||
|
|
||||||
/* Table of ifnet by index. */
|
/* Table of ifnet by index. */
|
||||||
VNET_DEFINE_STATIC(int, if_index);
|
static int if_index;
|
||||||
#define V_if_index VNET(if_index)
|
static int if_indexlim = 8;
|
||||||
VNET_DEFINE_STATIC(int, if_indexlim) = 8;
|
static struct ifnet **ifindex_table;
|
||||||
#define V_if_indexlim VNET(if_indexlim)
|
|
||||||
VNET_DEFINE_STATIC(struct ifnet **, ifindex_table);
|
|
||||||
#define V_ifindex_table VNET(ifindex_table)
|
|
||||||
|
|
||||||
SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
|
SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
|
||||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
|
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
|
||||||
"Variables global to all interfaces");
|
"Variables global to all interfaces");
|
||||||
SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
|
static int
|
||||||
CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(if_index), 0,
|
sysctl_ifcount(SYSCTL_HANDLER_ARGS)
|
||||||
"Number of configured interfaces");
|
{
|
||||||
|
int rv = 0;
|
||||||
|
|
||||||
|
IFNET_RLOCK();
|
||||||
|
for (int i = 1; i <= if_index; i++)
|
||||||
|
if (ifindex_table[i] != NULL &&
|
||||||
|
ifindex_table[i]->if_vnet == curvnet)
|
||||||
|
rv = i;
|
||||||
|
IFNET_RUNLOCK();
|
||||||
|
|
||||||
|
return (sysctl_handle_int(oidp, &rv, 0, req));
|
||||||
|
}
|
||||||
|
SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
|
||||||
|
CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I",
|
||||||
|
"Maximum known interface index");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The global network interface list (V_ifnet) and related state (such as
|
* The global network interface list (V_ifnet) and related state (such as
|
||||||
@@ -352,13 +363,19 @@ MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
|
|||||||
struct ifnet *
|
struct ifnet *
|
||||||
ifnet_byindex(u_int idx)
|
ifnet_byindex(u_int idx)
|
||||||
{
|
{
|
||||||
|
struct ifnet *ifp;
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
NET_EPOCH_ASSERT();
|
||||||
|
|
||||||
if (__predict_false(idx > V_if_index))
|
if (__predict_false(idx > if_index))
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
|
||||||
return (ck_pr_load_ptr(&V_ifindex_table[idx]));
|
ifp = ck_pr_load_ptr(&ifindex_table[idx]);
|
||||||
|
|
||||||
|
if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet)
|
||||||
|
ifp = NULL;
|
||||||
|
|
||||||
|
return (ifp);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ifnet *
|
struct ifnet *
|
||||||
@@ -374,58 +391,6 @@ ifnet_byindex_ref(u_int idx)
|
|||||||
return (ifp);
|
return (ifp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate an ifindex array entry.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
ifindex_alloc(struct ifnet *ifp)
|
|
||||||
{
|
|
||||||
u_short idx;
|
|
||||||
|
|
||||||
IFNET_WLOCK();
|
|
||||||
/*
|
|
||||||
* Try to find an empty slot below V_if_index. If we fail, take the
|
|
||||||
* next slot.
|
|
||||||
*/
|
|
||||||
for (idx = 1; idx <= V_if_index; idx++) {
|
|
||||||
if (V_ifindex_table[idx] == NULL)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Catch if_index overflow. */
|
|
||||||
if (idx >= V_if_indexlim) {
|
|
||||||
struct ifnet **new, **old;
|
|
||||||
int newlim;
|
|
||||||
|
|
||||||
newlim = V_if_indexlim * 2;
|
|
||||||
new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO);
|
|
||||||
memcpy(new, V_ifindex_table, V_if_indexlim * sizeof(*new));
|
|
||||||
old = V_ifindex_table;
|
|
||||||
ck_pr_store_ptr(&V_ifindex_table, new);
|
|
||||||
V_if_indexlim = newlim;
|
|
||||||
epoch_wait_preempt(net_epoch_preempt);
|
|
||||||
free(old, M_IFNET);
|
|
||||||
}
|
|
||||||
if (idx > V_if_index)
|
|
||||||
V_if_index = idx;
|
|
||||||
|
|
||||||
ifp->if_index = idx;
|
|
||||||
ck_pr_store_ptr(&V_ifindex_table[idx], ifp);
|
|
||||||
IFNET_WUNLOCK();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
ifindex_free(u_short idx)
|
|
||||||
{
|
|
||||||
|
|
||||||
IFNET_WLOCK_ASSERT();
|
|
||||||
|
|
||||||
ck_pr_store_ptr(&V_ifindex_table[idx], NULL);
|
|
||||||
while (V_if_index > 0 &&
|
|
||||||
V_ifindex_table[V_if_index] == NULL)
|
|
||||||
V_if_index--;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ifaddr *
|
struct ifaddr *
|
||||||
ifaddr_byindex(u_short idx)
|
ifaddr_byindex(u_short idx)
|
||||||
{
|
{
|
||||||
@@ -447,35 +412,26 @@ ifaddr_byindex(u_short idx)
|
|||||||
* parameters.
|
* parameters.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
if_init(void *arg __unused)
|
||||||
|
{
|
||||||
|
|
||||||
|
ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table),
|
||||||
|
M_IFNET, M_WAITOK | M_ZERO);
|
||||||
|
}
|
||||||
|
SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vnet_if_init(const void *unused __unused)
|
vnet_if_init(const void *unused __unused)
|
||||||
{
|
{
|
||||||
|
|
||||||
CK_STAILQ_INIT(&V_ifnet);
|
CK_STAILQ_INIT(&V_ifnet);
|
||||||
CK_STAILQ_INIT(&V_ifg_head);
|
CK_STAILQ_INIT(&V_ifg_head);
|
||||||
V_ifindex_table = malloc(V_if_indexlim * sizeof(*V_ifindex_table),
|
|
||||||
M_IFNET, M_WAITOK | M_ZERO);
|
|
||||||
vnet_if_clone_init();
|
vnet_if_clone_init();
|
||||||
}
|
}
|
||||||
VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
|
VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
#ifdef VIMAGE
|
|
||||||
static void
|
|
||||||
vnet_if_uninit(const void *unused __unused)
|
|
||||||
{
|
|
||||||
|
|
||||||
VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
|
|
||||||
"not empty", __func__, __LINE__, &V_ifnet));
|
|
||||||
VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
|
|
||||||
"not empty", __func__, __LINE__, &V_ifg_head));
|
|
||||||
|
|
||||||
free((caddr_t)V_ifindex_table, M_IFNET);
|
|
||||||
}
|
|
||||||
VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
|
|
||||||
vnet_if_uninit, NULL);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
if_link_ifnet(struct ifnet *ifp)
|
if_link_ifnet(struct ifnet *ifp)
|
||||||
{
|
{
|
||||||
@@ -568,6 +524,7 @@ static struct ifnet *
|
|||||||
if_alloc_domain(u_char type, int numa_domain)
|
if_alloc_domain(u_char type, int numa_domain)
|
||||||
{
|
{
|
||||||
struct ifnet *ifp;
|
struct ifnet *ifp;
|
||||||
|
u_short idx;
|
||||||
|
|
||||||
KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
|
KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
|
||||||
if (numa_domain == IF_NODOM)
|
if (numa_domain == IF_NODOM)
|
||||||
@@ -607,7 +564,37 @@ if_alloc_domain(u_char type, int numa_domain)
|
|||||||
ifp->if_get_counter = if_get_counter_default;
|
ifp->if_get_counter = if_get_counter_default;
|
||||||
ifp->if_pcp = IFNET_PCP_NONE;
|
ifp->if_pcp = IFNET_PCP_NONE;
|
||||||
|
|
||||||
ifindex_alloc(ifp);
|
/* Allocate an ifindex array entry. */
|
||||||
|
IFNET_WLOCK();
|
||||||
|
/*
|
||||||
|
* Try to find an empty slot below if_index. If we fail, take the
|
||||||
|
* next slot.
|
||||||
|
*/
|
||||||
|
for (idx = 1; idx <= if_index; idx++) {
|
||||||
|
if (ifindex_table[idx] == NULL)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Catch if_index overflow. */
|
||||||
|
if (idx >= if_indexlim) {
|
||||||
|
struct ifnet **new, **old;
|
||||||
|
int newlim;
|
||||||
|
|
||||||
|
newlim = if_indexlim * 2;
|
||||||
|
new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO);
|
||||||
|
memcpy(new, ifindex_table, if_indexlim * sizeof(*new));
|
||||||
|
old = ifindex_table;
|
||||||
|
ck_pr_store_ptr(&ifindex_table, new);
|
||||||
|
if_indexlim = newlim;
|
||||||
|
epoch_wait_preempt(net_epoch_preempt);
|
||||||
|
free(old, M_IFNET);
|
||||||
|
}
|
||||||
|
if (idx > if_index)
|
||||||
|
if_index = idx;
|
||||||
|
|
||||||
|
ifp->if_index = idx;
|
||||||
|
ck_pr_store_ptr(&ifindex_table[idx], ifp);
|
||||||
|
IFNET_WUNLOCK();
|
||||||
|
|
||||||
return (ifp);
|
return (ifp);
|
||||||
}
|
}
|
||||||
@@ -677,23 +664,18 @@ if_free(struct ifnet *ifp)
|
|||||||
* epoch and then dereferencing ifp while we peform if_free(),
|
* epoch and then dereferencing ifp while we peform if_free(),
|
||||||
* and after if_free() finished, too.
|
* and after if_free() finished, too.
|
||||||
*
|
*
|
||||||
* The reason is the VIMAGE. For some reason it was designed
|
* This early index freeing was important back when ifindex was
|
||||||
* to require all sockets drained before destroying, but not all
|
* virtualized and interface would outlive the vnet.
|
||||||
* ifnets. A vnet destruction calls if_vmove() on ifnet, which
|
|
||||||
* causes ID change. But ID change and a possible misidentification
|
|
||||||
* of an ifnet later is a lesser problem, as it doesn't crash kernel.
|
|
||||||
* A worse problem is that removed interface may outlive the vnet it
|
|
||||||
* belongs too! The if_free_deferred() would see ifp->if_vnet freed.
|
|
||||||
*/
|
*/
|
||||||
CURVNET_SET_QUIET(ifp->if_vnet);
|
|
||||||
IFNET_WLOCK();
|
IFNET_WLOCK();
|
||||||
MPASS(V_ifindex_table[ifp->if_index] == ifp);
|
MPASS(ifindex_table[ifp->if_index] == ifp);
|
||||||
ifindex_free(ifp->if_index);
|
ck_pr_store_ptr(&ifindex_table[ifp->if_index], NULL);
|
||||||
|
while (if_index > 0 && ifindex_table[if_index] == NULL)
|
||||||
|
if_index--;
|
||||||
IFNET_WUNLOCK();
|
IFNET_WUNLOCK();
|
||||||
|
|
||||||
if (refcount_release(&ifp->if_refcount))
|
if (refcount_release(&ifp->if_refcount))
|
||||||
NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx);
|
NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx);
|
||||||
CURVNET_RESTORE();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -837,7 +819,7 @@ if_attach_internal(struct ifnet *ifp, bool vmove)
|
|||||||
struct sockaddr_dl *sdl;
|
struct sockaddr_dl *sdl;
|
||||||
struct ifaddr *ifa;
|
struct ifaddr *ifa;
|
||||||
|
|
||||||
MPASS(V_ifindex_table[ifp->if_index] == ifp);
|
MPASS(ifindex_table[ifp->if_index] == ifp);
|
||||||
|
|
||||||
#ifdef VIMAGE
|
#ifdef VIMAGE
|
||||||
ifp->if_vnet = curvnet;
|
ifp->if_vnet = curvnet;
|
||||||
@@ -1287,17 +1269,6 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
|
|||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
return (rc);
|
return (rc);
|
||||||
|
|
||||||
/*
|
|
||||||
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
|
|
||||||
* the if_index for that vnet if possible.
|
|
||||||
*
|
|
||||||
* NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
|
|
||||||
* or we'd lock on one vnet and unlock on another.
|
|
||||||
*/
|
|
||||||
IFNET_WLOCK();
|
|
||||||
ifindex_free(ifp->if_index);
|
|
||||||
IFNET_WUNLOCK();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform interface-specific reassignment tasks, if provided by
|
* Perform interface-specific reassignment tasks, if provided by
|
||||||
* the driver.
|
* the driver.
|
||||||
@@ -1309,7 +1280,6 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
|
|||||||
* Switch to the context of the target vnet.
|
* Switch to the context of the target vnet.
|
||||||
*/
|
*/
|
||||||
CURVNET_SET_QUIET(new_vnet);
|
CURVNET_SET_QUIET(new_vnet);
|
||||||
ifindex_alloc(ifp);
|
|
||||||
if_attach_internal(ifp, true);
|
if_attach_internal(ifp, true);
|
||||||
|
|
||||||
#ifdef DEV_BPF
|
#ifdef DEV_BPF
|
||||||
@@ -1945,7 +1915,6 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
|
|||||||
struct ifaddr *ifa_maybe = NULL;
|
struct ifaddr *ifa_maybe = NULL;
|
||||||
u_int af = addr->sa_family;
|
u_int af = addr->sa_family;
|
||||||
const char *addr_data = addr->sa_data, *cplim;
|
const char *addr_data = addr->sa_data, *cplim;
|
||||||
const struct sockaddr_dl *sdl;
|
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
NET_EPOCH_ASSERT();
|
||||||
/*
|
/*
|
||||||
@@ -1953,14 +1922,9 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
|
|||||||
* so do that if we can.
|
* so do that if we can.
|
||||||
*/
|
*/
|
||||||
if (af == AF_LINK) {
|
if (af == AF_LINK) {
|
||||||
sdl = (const struct sockaddr_dl *)addr;
|
ifp = ifnet_byindex(
|
||||||
if (sdl->sdl_index && sdl->sdl_index <= V_if_index) {
|
((const struct sockaddr_dl *)addr)->sdl_index);
|
||||||
ifp = ifnet_byindex(sdl->sdl_index);
|
return (ifp ? ifp->if_addr : NULL);
|
||||||
if (ifp == NULL)
|
|
||||||
return (NULL);
|
|
||||||
|
|
||||||
return (ifp->if_addr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -4596,24 +4560,16 @@ DB_SHOW_COMMAND(ifnet, db_show_ifnet)
|
|||||||
|
|
||||||
DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
|
DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets)
|
||||||
{
|
{
|
||||||
VNET_ITERATOR_DECL(vnet_iter);
|
|
||||||
struct ifnet *ifp;
|
struct ifnet *ifp;
|
||||||
u_short idx;
|
u_short idx;
|
||||||
|
|
||||||
VNET_FOREACH(vnet_iter) {
|
for (idx = 1; idx <= if_index; idx++) {
|
||||||
CURVNET_SET_QUIET(vnet_iter);
|
ifp = ifindex_table[idx];
|
||||||
#ifdef VIMAGE
|
|
||||||
db_printf("vnet=%p\n", curvnet);
|
|
||||||
#endif
|
|
||||||
for (idx = 1; idx <= V_if_index; idx++) {
|
|
||||||
ifp = V_ifindex_table[idx];
|
|
||||||
if (ifp == NULL)
|
if (ifp == NULL)
|
||||||
continue;
|
continue;
|
||||||
db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
|
db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
|
||||||
if (db_pager_quit)
|
if (db_pager_quit)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
CURVNET_RESTORE();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif /* DDB */
|
#endif /* DDB */
|
||||||
|
|||||||
Reference in New Issue
Block a user