Consistently provide ffs/fls using builtins
Use of compiler builtin ffs/ctz functions will result in optimized instruction sequences when possible, and fall back to calling a function provided by the compiler run-time library. We have slowly shifted our platforms to take advantage of these builtins in60645781d6(arm64),1c76d3a9fb(arm),9e319462a0(powerpc, partial). Some platforms still rely on the libkern implementations of these functions provided by libkern, namely riscv, powerpc (ffs*, flsll), and i386 (ffsll and flsll). These routines are slow, as they perform a linear search for the bit in question. Even on platforms lacking dedicated bit-search instructions, such as riscv, the compiler library will provide better-optimized routines, e.g. by using binary search. Consolidate all definitions of these functions (whether currently using builtins or not) to libkern.h. This should result in equivalent or better performing routines in all cases. One wart in all of this is the existing HAVE_INLINE_F*** macros, which we use in a few places to conditionally avoid the slow libkern routines. These aren't easily removed in one commit. For now, provide these defines unconditionally, but marked for removal after subsequent cleanup. Removal of the now unused libkern routines will follow in the next commit. Reviewed by: dougm, imp (previous version) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D40698
This commit is contained in:
@@ -127,43 +127,6 @@ enable_intr(void)
|
||||
__asm __volatile("sti");
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define HAVE_INLINE_FFS
|
||||
#define ffs(x) __builtin_ffs(x)
|
||||
|
||||
#define HAVE_INLINE_FFSL
|
||||
#define ffsl(x) __builtin_ffsl(x)
|
||||
|
||||
#define HAVE_INLINE_FFSLL
|
||||
#define ffsll(x) __builtin_ffsll(x)
|
||||
|
||||
#define HAVE_INLINE_FLS
|
||||
|
||||
static __inline __pure2 int
|
||||
fls(int mask)
|
||||
{
|
||||
return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsl(long mask)
|
||||
{
|
||||
return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSLL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsll(long long mask)
|
||||
{
|
||||
return (flsl((long)mask));
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
static __inline void
|
||||
halt(void)
|
||||
{
|
||||
|
||||
@@ -183,63 +183,6 @@ void cpu_reset (void) __attribute__((__noreturn__));
|
||||
extern int arm_dcache_align;
|
||||
extern int arm_dcache_align_mask;
|
||||
|
||||
|
||||
#define HAVE_INLINE_FFS
|
||||
|
||||
static __inline __pure2 int
|
||||
ffs(int mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffs(mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FFSL
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsl(long mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffsl(mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FFSLL
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsll(long long mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffsll(mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLS
|
||||
|
||||
static __inline __pure2 int
|
||||
fls(int mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clz((u_int)mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsl(long mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clzl((u_long)mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSLL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsll(long long mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
|
||||
}
|
||||
#else /* !_KERNEL */
|
||||
|
||||
static __inline void
|
||||
|
||||
@@ -37,64 +37,6 @@ breakpoint(void)
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define HAVE_INLINE_FFS
|
||||
|
||||
static __inline __pure2 int
|
||||
ffs(int mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffs(mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FFSL
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsl(long mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffsl(mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FFSLL
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsll(long long mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffsll(mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLS
|
||||
|
||||
static __inline __pure2 int
|
||||
fls(int mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clz((u_int)mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsl(long mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clzl((u_long)mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSLL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsll(long long mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
|
||||
}
|
||||
|
||||
#include <machine/armreg.h>
|
||||
|
||||
void pan_enable(void);
|
||||
|
||||
@@ -141,8 +141,6 @@ kern/imgact_aout.c optional compat_aout
|
||||
kern/subr_sfbuf.c standard
|
||||
libkern/divdi3.c standard
|
||||
libkern/divmoddi4.c standard
|
||||
libkern/ffsll.c standard
|
||||
libkern/flsll.c standard
|
||||
libkern/memcmp.c standard
|
||||
libkern/memset.c standard
|
||||
libkern/moddi3.c standard
|
||||
|
||||
@@ -189,10 +189,6 @@ libkern/ashrdi3.c optional powerpc | powerpcspe
|
||||
libkern/bcopy.c standard
|
||||
libkern/cmpdi2.c optional powerpc | powerpcspe
|
||||
libkern/divdi3.c optional powerpc | powerpcspe
|
||||
libkern/ffs.c standard
|
||||
libkern/ffsl.c standard
|
||||
libkern/ffsll.c standard
|
||||
libkern/flsll.c standard
|
||||
libkern/lshrdi3.c optional powerpc | powerpcspe
|
||||
libkern/memcmp.c standard
|
||||
libkern/memset.c standard
|
||||
|
||||
@@ -23,12 +23,6 @@ kern/subr_dummy_vdso_tc.c standard
|
||||
kern/subr_intr.c standard
|
||||
kern/subr_physmem.c standard
|
||||
libkern/bcopy.c standard
|
||||
libkern/ffs.c standard
|
||||
libkern/ffsl.c standard
|
||||
libkern/ffsll.c standard
|
||||
libkern/fls.c standard
|
||||
libkern/flsl.c standard
|
||||
libkern/flsll.c standard
|
||||
libkern/memcmp.c standard
|
||||
libkern/memset.c standard
|
||||
libkern/strcmp.c standard
|
||||
|
||||
@@ -180,48 +180,6 @@ sfence(void)
|
||||
__asm __volatile("sfence" : : : "memory");
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#define HAVE_INLINE_FFS
|
||||
|
||||
static __inline __pure2 int
|
||||
ffs(int mask)
|
||||
{
|
||||
/*
|
||||
* Note that gcc-2's builtin ffs would be used if we didn't declare
|
||||
* this inline or turn off the builtin. The builtin is faster but
|
||||
* broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
|
||||
* versions.
|
||||
*/
|
||||
return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FFSL
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsl(long mask)
|
||||
{
|
||||
return (ffs((int)mask));
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLS
|
||||
|
||||
static __inline __pure2 int
|
||||
fls(int mask)
|
||||
{
|
||||
return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSL
|
||||
|
||||
static __inline __pure2 int
|
||||
flsl(long mask)
|
||||
{
|
||||
return (fls((int)mask));
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
static __inline void
|
||||
halt(void)
|
||||
{
|
||||
|
||||
@@ -258,20 +258,6 @@ get_pcpu(void)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLS
|
||||
static __inline __pure2 int
|
||||
fls(int mask)
|
||||
{
|
||||
return (mask ? 32 - __builtin_clz(mask) : 0);
|
||||
}
|
||||
|
||||
#define HAVE_INLINE_FLSL
|
||||
static __inline __pure2 int
|
||||
flsl(long mask)
|
||||
{
|
||||
return (mask ? (8 * sizeof(long) - __builtin_clzl(mask)) : 0);
|
||||
}
|
||||
|
||||
/* "NOP" operations to signify priorities to the kernel. */
|
||||
static __inline void
|
||||
nop_prio_vlow(void)
|
||||
|
||||
+57
-18
@@ -132,24 +132,63 @@ void arc4rand(void *, u_int, int);
|
||||
int timingsafe_bcmp(const void *, const void *, size_t);
|
||||
void *bsearch(const void *, const void *, size_t,
|
||||
size_t, int (*)(const void *, const void *));
|
||||
#ifndef HAVE_INLINE_FFS
|
||||
int ffs(int);
|
||||
#endif
|
||||
#ifndef HAVE_INLINE_FFSL
|
||||
int ffsl(long);
|
||||
#endif
|
||||
#ifndef HAVE_INLINE_FFSLL
|
||||
int ffsll(long long);
|
||||
#endif
|
||||
#ifndef HAVE_INLINE_FLS
|
||||
int fls(int);
|
||||
#endif
|
||||
#ifndef HAVE_INLINE_FLSL
|
||||
int flsl(long);
|
||||
#endif
|
||||
#ifndef HAVE_INLINE_FLSLL
|
||||
int flsll(long long);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* MHTODO: remove the 'HAVE_INLINE_FOO' defines once use of these flags has
|
||||
* been purged everywhere. For now we provide them unconditionally.
|
||||
*/
|
||||
#define HAVE_INLINE_FFS
|
||||
#define HAVE_INLINE_FFSL
|
||||
#define HAVE_INLINE_FFSLL
|
||||
#define HAVE_INLINE_FLS
|
||||
#define HAVE_INLINE_FLSL
|
||||
#define HAVE_INLINE_FLSLL
|
||||
|
||||
static __inline __pure2 int
|
||||
ffs(int mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffs((u_int)mask));
|
||||
}
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsl(long mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffsl((u_long)mask));
|
||||
}
|
||||
|
||||
static __inline __pure2 int
|
||||
ffsll(long long mask)
|
||||
{
|
||||
|
||||
return (__builtin_ffsll((unsigned long long)mask));
|
||||
}
|
||||
|
||||
static __inline __pure2 int
|
||||
fls(int mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clz((u_int)mask));
|
||||
}
|
||||
|
||||
static __inline __pure2 int
|
||||
flsl(long mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clzl((u_long)mask));
|
||||
}
|
||||
|
||||
static __inline __pure2 int
|
||||
flsll(long long mask)
|
||||
{
|
||||
|
||||
return (mask == 0 ? 0 :
|
||||
8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
|
||||
}
|
||||
|
||||
#define bitcount64(x) __bitcount64((uint64_t)(x))
|
||||
#define bitcount32(x) __bitcount32((uint32_t)(x))
|
||||
#define bitcount16(x) __bitcount16((uint16_t)(x))
|
||||
|
||||
Reference in New Issue
Block a user