Consistently provide ffs/fls using builtins

Use of compiler builtin ffs/ctz functions will result in optimized
instruction sequences when possible, and fall back to calling a function
provided by the compiler run-time library. We have slowly shifted our
platforms to take advantage of these builtins in 60645781d6 (arm64),
1c76d3a9fb (arm), 9e319462a0 (powerpc, partial).

Some platforms still rely on the libkern implementations of these
functions provided by libkern, namely riscv, powerpc (ffs*, flsll), and
i386 (ffsll and flsll). These routines are slow, as they perform a
linear search for the bit in question. Even on platforms lacking
dedicated bit-search instructions, such as riscv, the compiler library
will provide better-optimized routines, e.g. by using binary search.

Consolidate all definitions of these functions (whether currently using
builtins or not) to libkern.h. This should result in equivalent or
better performing routines in all cases.

One wart in all of this is the existing HAVE_INLINE_F*** macros, which
we use in a few places to conditionally avoid the slow libkern routines.
These aren't easily removed in one commit. For now, provide these
defines unconditionally, but marked for removal after subsequent
cleanup.

Removal of the now unused libkern routines will follow in the next
commit.

Reviewed by:	dougm, imp (previous version)
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D40698
This commit is contained in:
Mitchell Horne
2023-07-06 14:45:39 -03:00
parent e8404a72c5
commit a89262079e
9 changed files with 57 additions and 238 deletions
-37
View File
@@ -127,43 +127,6 @@ enable_intr(void)
__asm __volatile("sti"); __asm __volatile("sti");
} }
#ifdef _KERNEL
#define HAVE_INLINE_FFS
#define ffs(x) __builtin_ffs(x)
#define HAVE_INLINE_FFSL
#define ffsl(x) __builtin_ffsl(x)
#define HAVE_INLINE_FFSLL
#define ffsll(x) __builtin_ffsll(x)
#define HAVE_INLINE_FLS
static __inline __pure2 int
fls(int mask)
{
return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
}
#define HAVE_INLINE_FLSL
static __inline __pure2 int
flsl(long mask)
{
return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
}
#define HAVE_INLINE_FLSLL
static __inline __pure2 int
flsll(long long mask)
{
return (flsl((long)mask));
}
#endif /* _KERNEL */
static __inline void static __inline void
halt(void) halt(void)
{ {
-57
View File
@@ -183,63 +183,6 @@ void cpu_reset (void) __attribute__((__noreturn__));
extern int arm_dcache_align; extern int arm_dcache_align;
extern int arm_dcache_align_mask; extern int arm_dcache_align_mask;
#define HAVE_INLINE_FFS
static __inline __pure2 int
ffs(int mask)
{
return (__builtin_ffs(mask));
}
#define HAVE_INLINE_FFSL
static __inline __pure2 int
ffsl(long mask)
{
return (__builtin_ffsl(mask));
}
#define HAVE_INLINE_FFSLL
static __inline __pure2 int
ffsll(long long mask)
{
return (__builtin_ffsll(mask));
}
#define HAVE_INLINE_FLS
static __inline __pure2 int
fls(int mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clz((u_int)mask));
}
#define HAVE_INLINE_FLSL
static __inline __pure2 int
flsl(long mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clzl((u_long)mask));
}
#define HAVE_INLINE_FLSLL
static __inline __pure2 int
flsll(long long mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
}
#else /* !_KERNEL */ #else /* !_KERNEL */
static __inline void static __inline void
-58
View File
@@ -37,64 +37,6 @@ breakpoint(void)
} }
#ifdef _KERNEL #ifdef _KERNEL
#define HAVE_INLINE_FFS
static __inline __pure2 int
ffs(int mask)
{
return (__builtin_ffs(mask));
}
#define HAVE_INLINE_FFSL
static __inline __pure2 int
ffsl(long mask)
{
return (__builtin_ffsl(mask));
}
#define HAVE_INLINE_FFSLL
static __inline __pure2 int
ffsll(long long mask)
{
return (__builtin_ffsll(mask));
}
#define HAVE_INLINE_FLS
static __inline __pure2 int
fls(int mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clz((u_int)mask));
}
#define HAVE_INLINE_FLSL
static __inline __pure2 int
flsl(long mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clzl((u_long)mask));
}
#define HAVE_INLINE_FLSLL
static __inline __pure2 int
flsll(long long mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
}
#include <machine/armreg.h> #include <machine/armreg.h>
void pan_enable(void); void pan_enable(void);
-2
View File
@@ -141,8 +141,6 @@ kern/imgact_aout.c optional compat_aout
kern/subr_sfbuf.c standard kern/subr_sfbuf.c standard
libkern/divdi3.c standard libkern/divdi3.c standard
libkern/divmoddi4.c standard libkern/divmoddi4.c standard
libkern/ffsll.c standard
libkern/flsll.c standard
libkern/memcmp.c standard libkern/memcmp.c standard
libkern/memset.c standard libkern/memset.c standard
libkern/moddi3.c standard libkern/moddi3.c standard
-4
View File
@@ -189,10 +189,6 @@ libkern/ashrdi3.c optional powerpc | powerpcspe
libkern/bcopy.c standard libkern/bcopy.c standard
libkern/cmpdi2.c optional powerpc | powerpcspe libkern/cmpdi2.c optional powerpc | powerpcspe
libkern/divdi3.c optional powerpc | powerpcspe libkern/divdi3.c optional powerpc | powerpcspe
libkern/ffs.c standard
libkern/ffsl.c standard
libkern/ffsll.c standard
libkern/flsll.c standard
libkern/lshrdi3.c optional powerpc | powerpcspe libkern/lshrdi3.c optional powerpc | powerpcspe
libkern/memcmp.c standard libkern/memcmp.c standard
libkern/memset.c standard libkern/memset.c standard
-6
View File
@@ -23,12 +23,6 @@ kern/subr_dummy_vdso_tc.c standard
kern/subr_intr.c standard kern/subr_intr.c standard
kern/subr_physmem.c standard kern/subr_physmem.c standard
libkern/bcopy.c standard libkern/bcopy.c standard
libkern/ffs.c standard
libkern/ffsl.c standard
libkern/ffsll.c standard
libkern/fls.c standard
libkern/flsl.c standard
libkern/flsll.c standard
libkern/memcmp.c standard libkern/memcmp.c standard
libkern/memset.c standard libkern/memset.c standard
libkern/strcmp.c standard libkern/strcmp.c standard
-42
View File
@@ -180,48 +180,6 @@ sfence(void)
__asm __volatile("sfence" : : : "memory"); __asm __volatile("sfence" : : : "memory");
} }
#ifdef _KERNEL
#define HAVE_INLINE_FFS
static __inline __pure2 int
ffs(int mask)
{
/*
* Note that gcc-2's builtin ffs would be used if we didn't declare
* this inline or turn off the builtin. The builtin is faster but
* broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
* versions.
*/
return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
}
#define HAVE_INLINE_FFSL
static __inline __pure2 int
ffsl(long mask)
{
return (ffs((int)mask));
}
#define HAVE_INLINE_FLS
static __inline __pure2 int
fls(int mask)
{
return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
}
#define HAVE_INLINE_FLSL
static __inline __pure2 int
flsl(long mask)
{
return (fls((int)mask));
}
#endif /* _KERNEL */
static __inline void static __inline void
halt(void) halt(void)
{ {
-14
View File
@@ -258,20 +258,6 @@ get_pcpu(void)
return (ret); return (ret);
} }
#define HAVE_INLINE_FLS
static __inline __pure2 int
fls(int mask)
{
return (mask ? 32 - __builtin_clz(mask) : 0);
}
#define HAVE_INLINE_FLSL
static __inline __pure2 int
flsl(long mask)
{
return (mask ? (8 * sizeof(long) - __builtin_clzl(mask)) : 0);
}
/* "NOP" operations to signify priorities to the kernel. */ /* "NOP" operations to signify priorities to the kernel. */
static __inline void static __inline void
nop_prio_vlow(void) nop_prio_vlow(void)
+57 -18
View File
@@ -132,24 +132,63 @@ void arc4rand(void *, u_int, int);
int timingsafe_bcmp(const void *, const void *, size_t); int timingsafe_bcmp(const void *, const void *, size_t);
void *bsearch(const void *, const void *, size_t, void *bsearch(const void *, const void *, size_t,
size_t, int (*)(const void *, const void *)); size_t, int (*)(const void *, const void *));
#ifndef HAVE_INLINE_FFS
int ffs(int); /*
#endif * MHTODO: remove the 'HAVE_INLINE_FOO' defines once use of these flags has
#ifndef HAVE_INLINE_FFSL * been purged everywhere. For now we provide them unconditionally.
int ffsl(long); */
#endif #define HAVE_INLINE_FFS
#ifndef HAVE_INLINE_FFSLL #define HAVE_INLINE_FFSL
int ffsll(long long); #define HAVE_INLINE_FFSLL
#endif #define HAVE_INLINE_FLS
#ifndef HAVE_INLINE_FLS #define HAVE_INLINE_FLSL
int fls(int); #define HAVE_INLINE_FLSLL
#endif
#ifndef HAVE_INLINE_FLSL static __inline __pure2 int
int flsl(long); ffs(int mask)
#endif {
#ifndef HAVE_INLINE_FLSLL
int flsll(long long); return (__builtin_ffs((u_int)mask));
#endif }
static __inline __pure2 int
ffsl(long mask)
{
return (__builtin_ffsl((u_long)mask));
}
static __inline __pure2 int
ffsll(long long mask)
{
return (__builtin_ffsll((unsigned long long)mask));
}
static __inline __pure2 int
fls(int mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clz((u_int)mask));
}
static __inline __pure2 int
flsl(long mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clzl((u_long)mask));
}
static __inline __pure2 int
flsll(long long mask)
{
return (mask == 0 ? 0 :
8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
}
#define bitcount64(x) __bitcount64((uint64_t)(x)) #define bitcount64(x) __bitcount64((uint64_t)(x))
#define bitcount32(x) __bitcount32((uint32_t)(x)) #define bitcount32(x) __bitcount32((uint32_t)(x))
#define bitcount16(x) __bitcount16((uint16_t)(x)) #define bitcount16(x) __bitcount16((uint16_t)(x))