[libm] implementation of rsqrt, rsqrtf, and rsqrtl

From the PR:
The attached diff implements the inverse square root function, i.e,
rsqrt(x) = 1 / sqrt(x).  Exhaustive testing of the float version
suggests that it is correctly rounded in round-to-nearest for all
test values in the range [0x1p-127,0x1p126].
Exhaustive testing of rsqrt and rsqrtl cannot be done, but 1100M
values of x for rsqrt and 400M values for rsqrtl were tested.  All
tested values were correctly rounded.

I do not have access to LD128 (i.e., IEEE 128-bit floating point)
hardware, so the implementation of rsqrtl() is untested.

The following is a summary of changes to source code.

* lib/msun/Makefile:
  . Add s_rsqrt.c and s_rsqrtf.c to COMMON_SRCS.
  . For non-53-bit long double targets, add s_rsqrtl.c to COMMON_SRCS.
  . Add MLINKS for rsqrt.3, rsqrtf.3, and rsqrtl.3 to sqrt.3.

* lib/msun/Symbol.map:
  . Add rsqrt, rsqrtf, and rsqrtl to the Symbol map for shared libm.so.

* lib/msun/man/sqrt.3:
  . Update the sqrt.3 manual page to include information for rsqrt[fl].
  . Note, these function come from ISO C23 (and IEEE-754 2008).

* lib/msun/src/math.h:
  . Add prototypes for new functions.

* lib/msun/src/math_private.h:
  . Add _SPLIT, _FAST2SUM, _SLOW2SUM, _XADD, _MUL, and _XMUL
    macros to perform type-type arthimetic (i.e., float-float).

* src/s_rsqrt.c:
  . New file with the implementation of 'double rsqrt(double)'.
  . For 53-bit long double targets, add a weak reference for rsqrtl.

* src/s_rsqrtf.c:
  . New file with the implementation of 'float rsqrt(float)'.

* src/s_rsqrtl.c
  . New file with the implementation of 'long double rsqrt(long double)'.
    Note, the LD80 version uses bit twiddling and LD128 version is a
    straight C language implementation.  The LD128 is untested due to
    lack of hardware.

PR:	295089
MFC after:	1 week

This commit is contained in:

Steve Kargl

2026-05-08 17:06:08 +03:00

committed by

Konstantin Belousov

parent 8b03193289

commit 3085fc9d97

8 changed files with 654 additions and 5 deletions

									
										lib/msun/Makefile
									
		+3
		-3
	
												View File
												
				@@ -88,7 +88,7 @@ COMMON_SRCS= b_tgamma.c \

					s_lround.c s_lroundf.c s_lroundl.c s_modff.c \

					s_nan.c s_nearbyint.c s_nextafter.c s_nextafterf.c \

					s_nexttowardf.c s_remquo.c s_remquof.c \

					s_rint.c s_rintf.c s_round.c s_roundf.c \

					s_rint.c s_rintf.c s_round.c s_roundf.c s_rsqrt.c s_rsqrtf.c \

					s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \

					s_signgam.c s_significand.c s_significandf.c s_sin.c \

					s_sincos.c s_sincosf.c s_sinf.c \

				@@ -143,7 +143,7 @@ COMMON_SRCS+=	b_tgammal.c catrigl.c \

					s_fminimum_numl.c s_fminimum_mag_numl.c \

					s_frexpl.c s_logbl.c s_logl.c s_nanl.c \

					s_nextafterl.c s_nexttoward.c s_remquol.c s_rintl.c s_roundl.c \

					s_scalbnl.c s_sinl.c s_sincosl.c s_sinpil.c \

					s_scalbnl.c s_sinl.c s_sincosl.c s_sinpil.c s_rsqrtl.c \

					s_tanhl.c s_tanl.c s_tanpil.c s_truncl.c w_cabsl.c

				# Work around this warning from gcc:

				#     lib/msun/ld80/e_powl.c:275:1: error: floating constant exceeds range of

				@@ -289,7 +289,7 @@ MLINKS+=sincos.3 sincosf.3 sin.3 sincosl.3

				MLINKS+=sinh.3 sinhf.3 sinh.3 sinhl.3

				MLINKS+=sinpi.3 sinpif.3 sinpi.3 sinpil.3

				MLINKS+=sqrt.3 cbrt.3 sqrt.3 cbrtf.3 sqrt.3 cbrtl.3 sqrt.3 sqrtf.3 \

					sqrt.3 sqrtl.3

					sqrt.3 sqrtl.3 sqrt.3 rsqrt.3 sqrt.3 rsqrtf.3 sqrt.3 rsqrtl.3

				MLINKS+=tan.3 tanf.3 tan.3 tanl.3

				MLINKS+=tanh.3 tanhf.3 tanh.3 tanhl.3

				MLINKS+=tanpi.3 tanpif.3 tanpi.3 tanpil.3

lib/msun/Symbol.map

View File

@@ -344,4 +344,7 @@ FBSD_1.9 {
 	fminimum_mag_num;
 	fminimum_mag_numf;
 	fminimum_mag_numl;
 	rsqrt;
 	rsqrtf;
 	rsqrtl;
 };

lib/msun/man/sqrt.3

+51 -2

View File

@@ -25,17 +25,20 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .Dd February 15, 2020
 .Dd May 7, 2026
 .Dt SQRT 3
 .Os
 .Sh NAME
 .Nm cbrt ,
 .Nm cbrtf ,
 .Nm cbrtl ,
 .Nm rsqrt ,
 .Nm rsqrtf ,
 .Nm rsqrtl,
 .Nm sqrt ,
 .Nm sqrtf ,
 .Nm sqrtl
 .Nd cube root and square root functions
 .Nd cube root, square root, and inverse square root functions
 .Sh LIBRARY
 .Lb libm
 .Sh SYNOPSIS
@@ -47,6 +50,12 @@
 .Ft long double
 .Fn cbrtl "long double x"
 .Ft double
 .Fn rsqrt "double x"
 .Ft float
 .Fn rsqrtf "float x"
 .Ft long double
 .Fn rsqrtl "long double x"
 .Ft double
 .Fn sqrt "double x"
 .Ft float
 .Fn sqrtf "float x"
@@ -63,6 +72,15 @@ the cube root of
 .Fa x .
 .Pp
 The
 .Fn rsqrt ,
 .Fn rsqrtf ,
 and
 .Fn rsqrtl
 functions compute
 the inverse square root of
 .Fa x .
 .Pp
 The
 .Fn sqrt ,
 .Fn sqrtf ,
 and
@@ -77,6 +95,23 @@ The
 and
 .Fn cbrtl
 functions return the requested cube root.
 .Pp
 The
 .Fn rsqrt ,
 .Fn rsqrtf ,
 and
 .Fn rsqrtl
 functions return 1 divided by the square root of
 .Fa x
 unless an error occurs.
 An attempt to take the
 .Fn rsqrt
 of negative
 .Fa x
 raises an invalid exception and causes an \*(Na to be returned.
 The inverse square root of \*(Pm0 returns \*(Pm\(if and
 raises a divide-by-zero exception.
 .Pp
 The
 .Fn sqrt ,
 .Fn sqrtf ,
@@ -104,6 +139,13 @@ and
 .Fn sqrtl
 functions conform to
 .St -isoC-99 .
 The
 .Fn rsqrt ,
 .Fn rsqrtf ,
 and
 .Fn rsqrtl
 functions conform to ISO/IEC 9899:2024 ("ISO C23").
 .\" .St -isoC-24 .
 .Sh HISTORY
 The
 .Fn cbrt
@@ -120,3 +162,10 @@ The
 .Fn cbrtl
 function appeared in
 .Fx 9.0 .
 The
 .Fn rsqrt ,
 .Fn rsqrtf ,
 and
 .Fn rsqrtl
 functions appeared in
 .Fx 16.0 .

									
										lib/msun/src/math.h
									
		+3
		
												View File
												
				@@ -544,6 +544,9 @@ long double	fminimum_mag_numl(long double, long double);

				double		fminimum_num(double, double);

				float		fminimum_numf(float, float);

				long double	fminimum_numl(long double, long double);

				double		rsqrt(double);

				float		rsqrtf(float);

				long double	rsqrtl(long double);

				#endif /* __ISO_C_VISIBLE >= 2023 */

				__END_DECLS

									
										lib/msun/src/math_private.h
									
		+83
		
												View File
												
				@@ -471,6 +471,89 @@ do {								\

					(a) = __tmp;		\

				} while (0)

				/*

				 * Split x into high and low bits where CC is 0x1p(N/2) + 1 where

				 * N is rounded up for types with odd precisions.

				 *

				 * #define _CC (0x1p12F + 1)	// float

				 * #define _CC (0x1p27  + 1)	// double

				 * #define _CC (0x1p32L + 1)	// long double (LD80)

				 * #define _CC (0x1p57L + 1)	// long double (LD128)

				 */

				#define _SPLIT(x, xh, xl)		\

				do {					\

					typeof(x) __t1;			\

					__t1 = (x) * _CC;		\

					xh = __t1 + ((x) - __t1);	\

					xl = (x) - xh;			\

				} while(0)

				/*

				 * FAST2SUM requires |x| >= |y|.  x and y are full precision.

				 * Note, _2SUMF(x,y) above destroys x and y.

				 */

				#define _FAST2SUM(x, y, hi, lo)	\

				do {				\

					hi = (x) + (y);		\

					lo = (y) - (hi - (x));	\

				} while(0)

				/*

				 * SLOW2SUM does not require |x| >= |y|.  Here, x and y are full precision.

				 * The t1 temporary variable is volatile to prevent compiler optimizations.

				 * Note, _2SUM(x,y) above destroys x and y.

				 */

				#define _SLOW2SUM(x, y, hi, lo)			\

				do {						\

					volatile typeof(x) __t1;		\

					typeof(x) __t2;				\

					hi = (x) + (y);				\

					__t1 = hi - (y);			\

					__t2 = hi - __t1;			\

					lo = ((x) - __t1) + ((y) - __t2);	\

				} while(0)

				/*

				 * x and y are full precision quantities that have been split into high

				 * and low parts via the _SPLIT macro.  x and y are added to give z as

				 * high and low parts.

				 */

				#define _XADD(xh, xl, yh, yl, zh, zl)			\

				do {							\

					typeof(xh) __s1, __s2, __s3, __s4, __s5, __s6;	\

					_SLOW2SUM(xh, yh, __s1, __s2);			\

					_SLOW2SUM(xl, yl, __s3, __s4);			\

					_FAST2SUM(__s1, __s2 + __s3, __s5, __s6);	\

					_FAST2SUM(__s5, __s6 + __s4, zh, zl);		\

				} while(0)

				/*

				 * x and y are full precision quantities.  r1 and r2 are full precision

				 * high and low parts of the multiplication x * y.

				 */

				#define _MUL(x, y, r1 ,r2)				\

				do  {							\

					typeof(x) __xh, __xl, __yh, __yl;		\

					typeof(x) __t1;				\

					_SPLIT(x, __xh, __xl);				\

					_SPLIT(y, __yh, __yl);				\

					r1 = (x) * (y);					\

					__t1 = __xh * __yl + (__xh * __yh - r1);	\

					r2 = __xl * __yl + (__xl * __yh + __t1);	\

				} while(0)

				/*

				 * x and y are full precision quantities that have been split into high

				 * and low parts via the _SPLIT macro.  x and y are multiplied to give z

				 * as high and low parts.

				 */

				#define _XMUL(xh, xl, yh, yl, ph, pl)	\

				do {					\

				    _MUL(xh, yh, ph, pl);		\

				    pl += xl * yl + xl * yh + xh * yl;	\

				} while(0)

				/*

				 * Common routine to process the arguments to nan(), nanf(), and nanl().

				 */

									
										lib/msun/src/s_rsqrt.c
									
		+153
		
												View File
												
				@@ -0,0 +1,153 @@

				/*-

				 * Copyright (c) 2026 Steven G. Kargl

				 * All rights reserved.

				 *

				 * Redistribution and use in source and binary forms, with or without

				 * modification, are permitted provided that the following conditions

				 * are met:

				 * 1. Redistributions of source code must retain the above copyright

				 *    notice unmodified, this list of conditions, and the following

				 *    disclaimer.

				 * 2. Redistributions in binary form must reproduce the above copyright

				 *    notice, this list of conditions and the following disclaimer in the

				 *    documentation and/or other materials provided with the distribution.

				 *

				 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

				 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

				 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

				 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

				 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

				 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

				 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

				 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

				 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

				 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

				 */

				/**

				 * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).

				 *

				 * First, filter out special cases:

				 *

				 *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.

				 *   2. rsqrt(nan) = NaN.

				 *   3. rsqrt(+inf) returns +0.

				 *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.

				 *

				 * If x is a subnormal, scale x into the normal range by x*0x1pN; while

				 * recording the exponent of the scale factor N.  Split the possibly

				 * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).

				 * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is

				 * in [0.25,1) with n even.

				 *

				 * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive

				 * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in

				 * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then

				 * used with one iteration of Goldschmidt's algorithm:

				 *

				 *	z = x * y

				 *	h = y / 2

				 *	r = 0.5 - h * z

				 *	y = h * r + h

				 *

				 * A factor of 2 appears missing in the above, but it is included in the

				 * exponent m.

				 */

				#include <fenv.h>

				#include <float.h>

				#include "math.h"

				#include "math_private.h"

				#pragma STDC FENV_ACCESS ON

				#ifdef _CC

				#undef _CC

				#endif

				#define _CC (0x1p27 + 1)

				double

				rsqrt(double x)

				{

					volatile static const double vzero = 0;

					static const double half = 0.5;

					int hx, m, rnd;

					uint32_t lx, ux;

					double h, ph, pl, rh, rl, y, zh, zl;

					EXTRACT_WORDS(hx, lx, x);

					ux = (uint32_t)hx & 0x7fffffff;

					/* x = +-0.  Raise exception. */

					if ((ux | lx) == 0)

					    return (1 / x);

					/* x is NaN. */

					if (ux > 0x7ff00000)

					    return (x + x);

					/* x is +-inf. */

					if (ux == 0x7ff00000)

					    return (hx & 0x80000000 ? vzero / vzero : 0.);

					/* x < 0.  Raise exception. */

					if (hx < 0)

					    return (vzero / vzero);

					/*

					 * If x is subnormal, then scale it into the normal range.

					 * Split x into significand and exponent, x = f * 2^m, with

					 * f in [0.5,1) and m a biased exponent.

					 */

					m = 0;

					if (hx < 0x00100000) {		/* Subnormal */

					    x *= 0x1p54;

					    GET_HIGH_WORD(hx, x);

					    m = -54;

					}

					m += (hx >> 20) - 1022;

					hx = (hx & 0x000fffff) | 0x3fe00000;

					SET_HIGH_WORD(x, hx);

					/* m is odd.  Put x into [0.25,5) and increase m. */

					if (m & 1) {

					    x /= 2;

					    m += 1;

					}

					m = -(m >> 1);			/* Prepare for 2^(-m/2). */

					y = 1 / sqrt(x);	/* ~52-bit estimate. */

					h = y / 2;

					/*

					 * For values of x with a representation of 0x1.ffffffffffffepN

					 * with N an odd integer, the computed rsqrt() is not correctly

					 * rounded in round-to-nearest without toggling the rounding mode

					 * to FE_TOWARDZERO.  Note, FE_DOWNWARD also works.  However,

					 * messing with the rounding mode is expensive, so only do it

					 * when necessary. Example, x = 3.9999999999999991

					 * gives y --> hx = 0x3ff00000, lx = 0x00000001

					 */

					EXTRACT_WORDS(hx, lx, y);

					if ((hx & 0x000fffff) == 0 && lx == 1) {

					    rnd = fegetround();

					    fesetround(FE_TOWARDZERO);

					    _MUL(x, y, zh, zl);

					    _XMUL(zh, zl, h, 0, ph, pl);

					    fesetround(rnd);

					} else {

					    _MUL(x, y, zh, zl);

					    _XMUL(zh, zl, h, 0, ph, pl);

					}

					_XADD(-ph, -pl, half, 0, rh, rl);

					y = rh * h + h;

					ux = (m + 1024) << 20;

					INSERT_WORDS(x, ux, 0);

					return (y *= x);

				}

				#if LDBL_MANT_DIG == 53

				__weak_reference(rsqrt, rsqrtl);

				#endif

									
										lib/msun/src/s_rsqrtf.c
									
		+155
		
												View File
												
				@@ -0,0 +1,155 @@

				/*-

				 * Copyright (c) 2026 Steven G. Kargl

				 * All rights reserved.

				 *

				 * Redistribution and use in source and binary forms, with or without

				 * modification, are permitted provided that the following conditions

				 * are met:

				 * 1. Redistributions of source code must retain the above copyright

				 *    notice unmodified, this list of conditions, and the following

				 *    disclaimer.

				 * 2. Redistributions in binary form must reproduce the above copyright

				 *    notice, this list of conditions and the following disclaimer in the

				 *    documentation and/or other materials provided with the distribution.

				 *

				 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

				 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

				 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

				 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

				 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

				 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

				 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

				 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

				 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

				 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

				 */

				/**

				 * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).

				 *

				 * First, filter out special cases:

				 *

				 *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.

				 *   2. rsqrt(nan) = NaN.

				 *   3. rsqrt(+inf) returns +0.

				 *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.

				 *

				 * If x is a subnormal, scale x into the normal range by x*0x1pN; while

				 * recording the exponent of the scale factor N.  Split the possibly

				 * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).

				 * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is

				 * in [0.25,1) with n even.

				 *

				 * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive

				 * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in

				 * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then

				 * used with one iteration of Goldschmidt's algorithm:

				 *

				 *	z = x * y

				 *	h = y / 2

				 *	r = 0.5 - h * z

				 *	y = h * r + h

				 *

				 * A factor of 2 appears missing in the above, but it is included in the

				 * exponent m.

				 */

				#include <fenv.h>

				#include <float.h>

				#include "math.h"

				#include "math_private.h"

				#pragma STDC FENV_ACCESS ON

				#ifdef _CC

				#undef _CC

				#endif

				#define _CC (0x1p12F + 1)

				float

				rsqrtf(float x)

				{

					volatile static const float vzero = 0;

					static const float half = 0.5;

					uint32_t ix, ux;

					int m, rnd;

					float h, ph, pl, rh, rl, y, zh, zl;

					GET_FLOAT_WORD(ix, x);

					ux = ix & 0x7fffffff;

					/* x = +-0.  Raise exception. */

					if (ux == 0)

					    return (1 / x);

					/* x is NaN. */

					if (ux > 0x7f800000)

					    return (x + x);

					/* x is +-inf. */

					if (ux == 0x7f800000)

					    return (ix & 0x80000000 ? vzero / vzero : 0.F);

					/* x < 0.  Raise exception. */

					if (ix & 0x80000000)

					    return (vzero / vzero);

					/*

					 * If x is subnormal, then scale it into the normal range.

					 * Split x into significand and exponent, x = f * 2^m, with

					 * f in [0.5,1) and m a biased exponent.

					 */

					m = 0;

					if (ux < 0x00800000) {		/* Subnormal */

					    x *= 0x1p25f;

					    GET_FLOAT_WORD(ix, x);

					    m = -25;

					}

					m += (ix >> 23) - 126;		/* Unbiased exponent */

					ix = (ix & 0x007fffff) | 0x3f000000;

					SET_FLOAT_WORD(x, ix);		/* x is in [0.5,1). */

					/* m is odd.  Put x into [0.25,5) and increase m. */

					if (m & 1) {

					    x /= 2;

					    m += 1;

					}

					m = -(m >> 1);		/* Prepare for 2^(-m/2). */

					/*

					 * Exhaustive testing of rsqrtf(x) = 1 / sqrtf(x) with x in

					 * [0x1p-127,0x1p126] shows the this approximation gives a

					 * 22- to 23-bit estimate of rsqrt(f).  This is equivalent to

					 * a max ulp of ~1.49.

					 */

					y = 1 / sqrtf(x);

					h = y / 2;

					/*

					 * For values of x with a representation of 0x1.fffffcpN with

					 * N an odd integer, the computed rsqrtf() is not correctly

					 * rounded in round-to-nearest without toggling the rounding

					 * mode  to FE_TOWARDZERO.  Note, FE_DOWNWARD also works.

					 * However, messing with the rounding mode is expensive, so

					 * only do it when necessary. Example, x = 0x1.fffffcp3 gives

					 * y --> 0x3f800001.

					 */

					GET_FLOAT_WORD(ix, y);

					if ((ix & 0x000fffff) == 1) {

					    rnd = fegetround();

					    fesetround(FE_TOWARDZERO);

					    _MUL(x, y, zh, zl);

					    _XMUL(zh, zl, h, 0, ph, pl);

					   fesetround(rnd);

					} else {

					    _MUL(x, y, zh, zl);

					    _XMUL(zh, zl, h, 0, ph, pl);

					}

					_XADD(-ph, -pl, half, 0, rh, rl);

					y = h * rh + h;

					ix = (uint32_t)(m + 128) << 23;

					SET_FLOAT_WORD(x, ix);

					return (y *= x);

				}

									
										lib/msun/src/s_rsqrtl.c
									
		+203
		
												View File
												
				@@ -0,0 +1,203 @@

				/*-

				 * Copyright (c) 2026 Steven G. Kargl

				 * All rights reserved.

				 *

				 * Redistribution and use in source and binary forms, with or without

				 * modification, are permitted provided that the following conditions

				 * are met:

				 * 1. Redistributions of source code must retain the above copyright

				 *    notice unmodified, this list of conditions, and the following

				 *    disclaimer.

				 * 2. Redistributions in binary form must reproduce the above copyright

				 *    notice, this list of conditions and the following disclaimer in the

				 *    documentation and/or other materials provided with the distribution.

				 *

				 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

				 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

				 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

				 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

				 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

				 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

				 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

				 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

				 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

				 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

				 */

				/**

				 * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).

				 *

				 * First, filter out special cases:

				 *

				 *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.

				 *   2. rsqrt(nan) = NaN.

				 *   3. rsqrt(+inf) returns +0.

				 *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.

				 *

				 * If x is a subnormal, scale x into the normal range by x*0x1pN; while

				 * recording the exponent of the scale factor N.  Split the possibly

				 * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).

				 * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is

				 * in [0.25,1) with n even.

				 *

				 * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive

				 * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in

				 * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then

				 * used with one iteration of Goldschmidt's algorithm:

				 *

				 *	z = x * y

				 *	h = y / 2

				 *	r = 0.5 - h * z

				 *	y = h * r + h

				 *

				 * A factor of 2 appears missing in the above, but it is included in the

				 * exponent m.

				 */

				#include <fenv.h>

				#include <float.h>

				#include "math.h"

				#include "math_private.h"

				#include "fpmath.h"

				#pragma STDC FENV_ACCESS ON

				#if LDBL_MANT_DIG == 64

				#ifdef _CC

				#undef _CC

				#endif

				#define _CC (0x1p32L + 1)

				long double

				rsqrtl(long double x)

				{

					volatile static const double vzero = 0;

					static const double half = 0.5;

					uint32_t ux;

					int m, rnd;

					long double h, ph, pl, rh, rl, y, zh, zl;

					union IEEEl2bits u;

					u.e = x;

					ux = (u.bits.manl | u.bits.manh);

					/* x = +-0.  Raise exception. */

					if ((u.bits.exp | ux) == 0)

					    return (1 / x);

					/* x is NaN or x is +-inf. */

					if (u.bits.exp == 0x7fff)

					    return (ux ? (x + x) : (u.bits.sign ? vzero / vzero : 0));

					/* x < 0.  Raise exception. */

					if (u.bits.sign)

					    return (vzero / vzero);

					/*

					 * If x is subnormal, then scale it into the normal range.

					 * Split x into significand and exponent, x = f * 2^m, with

					 * f in [0.5,1) and m a biased exponent.

					 */

					ENTERI();

					if (u.bits.exp == 0) {		/* Subnormal */

					    u.e *= 0x1p512;

					    m = u.bits.exp - 0x41fe;

					} else {

					    m = u.bits.exp - 0x3ffe;

					}

					u.bits.exp = 0x3ffe;

					/* m is odd.  Put x into [0.25,5) and increase m. */

					if (m & 1) {

					    u.e /= 2;

					    m += 1;

					}

					m = -(m >> 1);			/* Prepare for 2^(-m/2). */

					y = 1 / sqrt((double)u.e);	/* ~52-bit estimate. */

					y -= y * (u.e * y * y - 1) / 2;	/* ~63-bit estimate. */

					h = y / 2;

					_MUL(u.e, y, zh, zl);

					_XMUL(zh, zl, h, 0, ph, pl);

					_XADD(-ph, -pl, half, 0, rh, rl);

					y = rh * h + h;

					u.e = 1;

					u.xbits.expsign = 0x3fff + m + 1;

					RETURNI(y * u.e);

				}

				#else

				#ifdef _CC

				#undef _CC

				#endif

				#define _CC (0x1p57L + 1)

				long double

				rsqrtl(long double x)

				{

					volatile static const double vzero = 0;

					int hx, m, rnd;

					long double y;

					/* x = +-0.  Raise exception. */

					if (x == 0)

					    return (1 / x);

					/* x is NaN. */

					if (isnan(x))

					    return (x + x);

					/* x is +-inf. */

					if (isinf(x))

					    return (x > 0 ? 0 : vzero / vzero);

					/* x < 0.  Raise exception. */

					if (x < 0)

					    return (vzero / vzero);

					/*

					 * If x is subnormal, then scale it into the normal range.

					 * Split x into significand and exponent, x = f * 2^m, with

					 * f in [0.5,1) and m a biased exponent.

					 */

					m = 0;

					if (!isnormal(x)) {

					    x *= 0x1p114L;

					    m = -114;

					}

					x = frexpl(x, &hx);

					m += hx;

					/* m is odd.  Put x into [0.25,5) and increase m. */

					if (m & 1) {

					    x /= 2;

					    m += 1;

					}

					m = -(m >> 1);			/* Prepare for 2^(-m/2). */

					y = 1 / sqrt((double)x);	/* ~52-bit estimate. */

					y -= y * (x * y * y - 1) / 2;	/* ~104-bit estimate. */

					static const double half = 0.5;

					long double h, ph, pl, rh, rl, zh, zl;

					h = y / 2;

					rnd = fegetround();

					fesetround(FE_TOWARDZERO);

					_MUL(x, y, zh, zl);

					_XMUL(zh, zl, -h, 0, ph, pl);

					fesetround(rnd);

					_XADD(ph, pl, half, 0, rh, rl);

					y = rh * h + h;

					m++;

					RETURNI(ldexpl(y, m));

				}

				#endif