Merge llvm-project release/18.x llvmorg-18.1.3-0-gc13b7485b879

This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvm-project release/18.x llvmorg-18.1.3-0-gc13b7485b879.

PR:		276104
MFC after:	1 month
This commit is contained in:
Dimitry Andric
2024-04-06 00:02:56 +02:00
107 changed files with 1268 additions and 433 deletions
@@ -673,6 +673,16 @@ class alignas(8) Decl {
/// fragment. See [module.global.frag]p3,4 for details. /// fragment. See [module.global.frag]p3,4 for details.
bool isDiscardedInGlobalModuleFragment() const { return false; } bool isDiscardedInGlobalModuleFragment() const { return false; }
/// Check if we should skip checking ODRHash for declaration \param D.
///
/// The existing ODRHash mechanism seems to be not stable enough and
/// the false positive ODR violation reports are annoying and we rarely see
/// true ODR violation reports. Also we learned that MSVC disabled ODR checks
/// for declarations in GMF. So we try to disable ODR checks in the GMF to
/// get better user experiences before we make the ODR violation checks stable
/// enough.
bool shouldSkipCheckingODR() const;
/// Return true if this declaration has an attribute which acts as /// Return true if this declaration has an attribute which acts as
/// definition of the entity, such as 'alias' or 'ifunc'. /// definition of the entity, such as 'alias' or 'ifunc'.
bool hasDefiningAttr() const; bool hasDefiningAttr() const;
@@ -925,8 +925,8 @@ class LambdaScopeInfo final :
/// that were defined in parent contexts. Used to avoid warnings when the /// that were defined in parent contexts. Used to avoid warnings when the
/// shadowed variables are uncaptured by this lambda. /// shadowed variables are uncaptured by this lambda.
struct ShadowedOuterDecl { struct ShadowedOuterDecl {
const VarDecl *VD; const NamedDecl *VD;
const VarDecl *ShadowedDecl; const NamedDecl *ShadowedDecl;
}; };
llvm::SmallVector<ShadowedOuterDecl, 4> ShadowingDecls; llvm::SmallVector<ShadowedOuterDecl, 4> ShadowingDecls;
@@ -2451,13 +2451,6 @@ class BitsUnpacker {
uint32_t Value; uint32_t Value;
uint32_t CurrentBitsIndex = ~0; uint32_t CurrentBitsIndex = ~0;
}; };
inline bool shouldSkipCheckingODR(const Decl *D) {
return D->getOwningModule() &&
D->getASTContext().getLangOpts().SkipODRCheckInGMF &&
D->getOwningModule()->isExplicitGlobalModule();
}
} // namespace clang } // namespace clang
#endif // LLVM_CLANG_SERIALIZATION_ASTREADER_H #endif // LLVM_CLANG_SERIALIZATION_ASTREADER_H
+1 -1
View File
@@ -4476,7 +4476,7 @@ unsigned FunctionDecl::getODRHash() {
} }
class ODRHash Hash; class ODRHash Hash;
Hash.AddFunctionDecl(this); Hash.AddFunctionDecl(this, /*SkipBody=*/shouldSkipCheckingODR());
setHasODRHash(true); setHasODRHash(true);
ODRHash = Hash.CalculateHash(); ODRHash = Hash.CalculateHash();
return ODRHash; return ODRHash;
@@ -1102,6 +1102,11 @@ bool Decl::isInAnotherModuleUnit() const {
return M != getASTContext().getCurrentNamedModule(); return M != getASTContext().getCurrentNamedModule();
} }
bool Decl::shouldSkipCheckingODR() const {
return getASTContext().getLangOpts().SkipODRCheckInGMF && getOwningModule() &&
getOwningModule()->isExplicitGlobalModule();
}
static Decl::Kind getKind(const Decl *D) { return D->getKind(); } static Decl::Kind getKind(const Decl *D) { return D->getKind(); }
static Decl::Kind getKind(const DeclContext *DC) { return DC->getDeclKind(); } static Decl::Kind getKind(const DeclContext *DC) { return DC->getDeclKind(); }
@@ -301,10 +301,9 @@ bool Module::directlyUses(const Module *Requested) {
if (Requested->isSubModuleOf(Use)) if (Requested->isSubModuleOf(Use))
return true; return true;
// Anyone is allowed to use our builtin stdarg.h and stddef.h and their // Anyone is allowed to use our builtin stddef.h and its accompanying modules.
// accompanying modules. if (Requested->fullModuleNameIs({"_Builtin_stddef", "max_align_t"}) ||
if (Requested->getTopLevelModuleName() == "_Builtin_stdarg" || Requested->fullModuleNameIs({"_Builtin_stddef_wint_t"}))
Requested->getTopLevelModuleName() == "_Builtin_stddef")
return true; return true;
if (NoUndeclaredIncludes) if (NoUndeclaredIncludes)
@@ -186,6 +186,14 @@ class EmitAssemblyHelper {
TargetTriple.getVendor() != llvm::Triple::Apple; TargetTriple.getVendor() != llvm::Triple::Apple;
} }
/// Check whether we should emit a flag for UnifiedLTO.
/// The UnifiedLTO module flag should be set when UnifiedLTO is enabled for
/// ThinLTO or Full LTO with module summaries.
bool shouldEmitUnifiedLTOModueFlag() const {
return CodeGenOpts.UnifiedLTO &&
(CodeGenOpts.PrepareForThinLTO || shouldEmitRegularLTOSummary());
}
public: public:
EmitAssemblyHelper(DiagnosticsEngine &_Diags, EmitAssemblyHelper(DiagnosticsEngine &_Diags,
const HeaderSearchOptions &HeaderSearchOpts, const HeaderSearchOptions &HeaderSearchOpts,
@@ -1029,7 +1037,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule)
MPM.addPass(VerifierPass()); MPM.addPass(VerifierPass());
if (Action == Backend_EmitBC || Action == Backend_EmitLL) { if (Action == Backend_EmitBC || Action == Backend_EmitLL ||
CodeGenOpts.FatLTO) {
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
@@ -1040,11 +1049,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!ThinLinkOS) if (!ThinLinkOS)
return; return;
} }
if (CodeGenOpts.UnifiedLTO)
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
MPM.addPass(ThinLTOBitcodeWriterPass( MPM.addPass(ThinLTOBitcodeWriterPass(
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
} else { } else if (Action == Backend_EmitLL) {
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
/*EmitLTOSummary=*/true)); /*EmitLTOSummary=*/true));
} }
@@ -1058,24 +1065,17 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
uint32_t(1)); uint32_t(1));
if (CodeGenOpts.UnifiedLTO)
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
} }
if (Action == Backend_EmitBC) if (Action == Backend_EmitBC) {
MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
EmitLTOSummary)); EmitLTOSummary));
else } else if (Action == Backend_EmitLL) {
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
EmitLTOSummary)); EmitLTOSummary));
} }
} }
if (CodeGenOpts.FatLTO) {
// Set the EnableSplitLTOUnit and UnifiedLTO module flags, since FatLTO if (shouldEmitUnifiedLTOModueFlag())
// uses a different action than Backend_EmitBC or Backend_EmitLL.
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
uint32_t(CodeGenOpts.EnableSplitLTOUnit));
if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO"))
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
} }
@@ -1241,20 +1241,30 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
return; return;
} }
// If the initializer is small, use a handful of stores. // If the initializer is small or trivialAutoVarInit is set, use a handful of
// stores.
bool IsTrivialAutoVarInitPattern =
CGM.getContext().getLangOpts().getTrivialAutoVarInit() ==
LangOptions::TrivialAutoVarInitKind::Pattern;
if (shouldSplitConstantStore(CGM, ConstantSize)) { if (shouldSplitConstantStore(CGM, ConstantSize)) {
if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { if (auto *STy = dyn_cast<llvm::StructType>(Ty)) {
if (STy == Loc.getElementType() ||
(STy != Loc.getElementType() && IsTrivialAutoVarInitPattern)) {
const llvm::StructLayout *Layout = const llvm::StructLayout *Layout =
CGM.getDataLayout().getStructLayout(STy); CGM.getDataLayout().getStructLayout(STy);
for (unsigned i = 0; i != constant->getNumOperands(); i++) { for (unsigned i = 0; i != constant->getNumOperands(); i++) {
CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i)); CharUnits CurOff =
CharUnits::fromQuantity(Layout->getElementOffset(i));
Address EltPtr = Builder.CreateConstInBoundsByteGEP( Address EltPtr = Builder.CreateConstInBoundsByteGEP(
Loc.withElementType(CGM.Int8Ty), CurOff); Loc.withElementType(CGM.Int8Ty), CurOff);
emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder, emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
constant->getAggregateElement(i), IsAutoInit); constant->getAggregateElement(i), IsAutoInit);
} }
return; return;
}
} else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) {
if (ATy == Loc.getElementType() ||
(ATy != Loc.getElementType() && IsTrivialAutoVarInitPattern)) {
for (unsigned i = 0; i != ATy->getNumElements(); i++) { for (unsigned i = 0; i != ATy->getNumElements(); i++) {
Address EltPtr = Builder.CreateConstGEP( Address EltPtr = Builder.CreateConstGEP(
Loc.withElementType(ATy->getElementType()), i); Loc.withElementType(ATy->getElementType()), i);
@@ -1264,6 +1274,7 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
return; return;
} }
} }
}
// Copy from a global. // Copy from a global.
auto *I = auto *I =
@@ -44,8 +44,15 @@ std::string wasm::Linker::getLinkerPath(const ArgList &Args) const {
llvm::sys::fs::can_execute(UseLinker)) llvm::sys::fs::can_execute(UseLinker))
return std::string(UseLinker); return std::string(UseLinker);
// Accept 'lld', and 'ld' as aliases for the default linker // Interpret 'lld' as explicitly requesting `wasm-ld`, so look for that
if (UseLinker != "lld" && UseLinker != "ld") // linker. Note that for `wasm32-wasip2` this overrides the default linker
// of `wasm-component-ld`.
if (UseLinker == "lld") {
return ToolChain.GetProgramPath("wasm-ld");
}
// Allow 'ld' as an alias for the default linker
if (UseLinker != "ld")
ToolChain.getDriver().Diag(diag::err_drv_invalid_linker_name) ToolChain.getDriver().Diag(diag::err_drv_invalid_linker_name)
<< A->getAsString(Args); << A->getAsString(Args);
} }
@@ -73,6 +80,16 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (Args.hasArg(options::OPT_s)) if (Args.hasArg(options::OPT_s))
CmdArgs.push_back("--strip-all"); CmdArgs.push_back("--strip-all");
// On `wasip2` the default linker is `wasm-component-ld` which wraps the
// execution of `wasm-ld`. Find `wasm-ld` and pass it as an argument of where
// to find it to avoid it needing to hunt and rediscover or search `PATH` for
// where it is.
if (llvm::sys::path::stem(Linker).ends_with_insensitive(
"wasm-component-ld")) {
CmdArgs.push_back("--wasm-ld-path");
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetProgramPath("wasm-ld")));
}
Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_u}); Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_u});
ToolChain.AddFilePathLibArgs(Args, CmdArgs); ToolChain.AddFilePathLibArgs(Args, CmdArgs);
@@ -221,6 +238,12 @@ WebAssembly::WebAssembly(const Driver &D, const llvm::Triple &Triple,
} }
} }
const char *WebAssembly::getDefaultLinker() const {
if (getOS() == "wasip2")
return "wasm-component-ld";
return "wasm-ld";
}
bool WebAssembly::IsMathErrnoDefault() const { return false; } bool WebAssembly::IsMathErrnoDefault() const { return false; }
bool WebAssembly::IsObjCNonFragileABIDefault() const { return true; } bool WebAssembly::IsObjCNonFragileABIDefault() const { return true; }
@@ -67,7 +67,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain {
llvm::opt::ArgStringList &CmdArgs) const override; llvm::opt::ArgStringList &CmdArgs) const override;
SanitizerMask getSupportedSanitizers() const override; SanitizerMask getSupportedSanitizers() const override;
const char *getDefaultLinker() const override { return "wasm-ld"; } const char *getDefaultLinker() const override;
CXXStdlibType GetDefaultCXXStdlibType() const override { CXXStdlibType GetDefaultCXXStdlibType() const override {
return ToolChain::CST_Libcxx; return ToolChain::CST_Libcxx;
@@ -7,7 +7,7 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#if !defined(NULL) || !__has_feature(modules) #if !defined(NULL) || !__building_module(_Builtin_stddef)
/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define /* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
* __need_NULL and rely on stddef.h to redefine NULL to the correct value again. * __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
@@ -7,7 +7,12 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef _NULLPTR_T /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_NULLPTR_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _NULLPTR_T #define _NULLPTR_T
#ifdef __cplusplus #ifdef __cplusplus
@@ -7,6 +7,11 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef offsetof /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(offsetof) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define offsetof(t, d) __builtin_offsetof(t, d) #define offsetof(t, d) __builtin_offsetof(t, d)
#endif #endif
@@ -7,7 +7,12 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef _PTRDIFF_T /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_PTRDIFF_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _PTRDIFF_T #define _PTRDIFF_T
typedef __PTRDIFF_TYPE__ ptrdiff_t; typedef __PTRDIFF_TYPE__ ptrdiff_t;
@@ -7,7 +7,12 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef _RSIZE_T /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_RSIZE_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _RSIZE_T #define _RSIZE_T
typedef __SIZE_TYPE__ rsize_t; typedef __SIZE_TYPE__ rsize_t;
@@ -7,7 +7,12 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef _SIZE_T /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_SIZE_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _SIZE_T #define _SIZE_T
typedef __SIZE_TYPE__ size_t; typedef __SIZE_TYPE__ size_t;
@@ -7,6 +7,11 @@
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef unreachable /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(unreachable) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define unreachable() __builtin_unreachable() #define unreachable() __builtin_unreachable()
#endif #endif
@@ -9,7 +9,12 @@
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED) #if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
#ifndef _WCHAR_T /*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_WCHAR_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _WCHAR_T #define _WCHAR_T
#ifdef _MSC_EXTENSIONS #ifdef _MSC_EXTENSIONS
@@ -156,7 +156,7 @@ extern __inline unsigned char
return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
} }
extern __inline unsigned char extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__iocsrrd_h(unsigned int _1) { __iocsrrd_h(unsigned int _1) {
return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
@@ -155,9 +155,9 @@ module _Builtin_intrinsics [system] [extern_c] {
// Start -fbuiltin-headers-in-system-modules affected modules // Start -fbuiltin-headers-in-system-modules affected modules
// The following modules all ignore their top level headers // The following modules all ignore their headers when
// when -fbuiltin-headers-in-system-modules is passed, and // -fbuiltin-headers-in-system-modules is passed, and many of
// most of those headers join system modules when present. // those headers join system modules when present.
// e.g. if -fbuiltin-headers-in-system-modules is passed, then // e.g. if -fbuiltin-headers-in-system-modules is passed, then
// float.h will not be in the _Builtin_float module (that module // float.h will not be in the _Builtin_float module (that module
@@ -190,11 +190,6 @@ module _Builtin_stdalign [system] {
export * export *
} }
// When -fbuiltin-headers-in-system-modules is passed, only
// the top level headers are removed, the implementation headers
// will always be in their submodules. That means when stdarg.h
// is included, it will still import this module and make the
// appropriate submodules visible.
module _Builtin_stdarg [system] { module _Builtin_stdarg [system] {
textual header "stdarg.h" textual header "stdarg.h"
@@ -237,6 +232,8 @@ module _Builtin_stdbool [system] {
module _Builtin_stddef [system] { module _Builtin_stddef [system] {
textual header "stddef.h" textual header "stddef.h"
// __stddef_max_align_t.h is always in this module, even if
// -fbuiltin-headers-in-system-modules is passed.
explicit module max_align_t { explicit module max_align_t {
header "__stddef_max_align_t.h" header "__stddef_max_align_t.h"
export * export *
@@ -283,9 +280,10 @@ module _Builtin_stddef [system] {
} }
} }
/* wint_t is provided by <wchar.h> and not <stddef.h>. It's here // wint_t is provided by <wchar.h> and not <stddef.h>. It's here
* for compatibility, but must be explicitly requested. Therefore // for compatibility, but must be explicitly requested. Therefore
* __stddef_wint_t.h is not part of _Builtin_stddef. */ // __stddef_wint_t.h is not part of _Builtin_stddef. It is always in
// this module even if -fbuiltin-headers-in-system-modules is passed.
module _Builtin_stddef_wint_t [system] { module _Builtin_stddef_wint_t [system] {
header "__stddef_wint_t.h" header "__stddef_wint_t.h"
export * export *
@@ -2498,9 +2498,12 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,
} }
bool NeedsFramework = false; bool NeedsFramework = false;
// Don't add the top level headers to the builtin modules if the builtin headers // Don't add headers to the builtin modules if the builtin headers belong to
// belong to the system modules. // the system modules, with the exception of __stddef_max_align_t.h which
if (!Map.LangOpts.BuiltinHeadersInSystemModules || ActiveModule->isSubModule() || !isBuiltInModuleName(ActiveModule->Name)) // always had its own module.
if (!Map.LangOpts.BuiltinHeadersInSystemModules ||
!isBuiltInModuleName(ActiveModule->getTopLevelModuleName()) ||
ActiveModule->fullModuleNameIs({"_Builtin_stddef", "max_align_t"}))
Map.addUnresolvedHeader(ActiveModule, std::move(Header), NeedsFramework); Map.addUnresolvedHeader(ActiveModule, std::move(Header), NeedsFramework);
if (NeedsFramework) if (NeedsFramework)
@@ -8396,28 +8396,40 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl,
unsigned WarningDiag = diag::warn_decl_shadow; unsigned WarningDiag = diag::warn_decl_shadow;
SourceLocation CaptureLoc; SourceLocation CaptureLoc;
if (isa<VarDecl>(D) && isa<VarDecl>(ShadowedDecl) && NewDC && if (isa<VarDecl>(D) && NewDC && isa<CXXMethodDecl>(NewDC)) {
isa<CXXMethodDecl>(NewDC)) {
if (const auto *RD = dyn_cast<CXXRecordDecl>(NewDC->getParent())) { if (const auto *RD = dyn_cast<CXXRecordDecl>(NewDC->getParent())) {
if (RD->isLambda() && OldDC->Encloses(NewDC->getLexicalParent())) { if (RD->isLambda() && OldDC->Encloses(NewDC->getLexicalParent())) {
if (RD->getLambdaCaptureDefault() == LCD_None) { if (const auto *VD = dyn_cast<VarDecl>(ShadowedDecl)) {
// Try to avoid warnings for lambdas with an explicit capture list.
const auto *LSI = cast<LambdaScopeInfo>(getCurFunction()); const auto *LSI = cast<LambdaScopeInfo>(getCurFunction());
// Warn only when the lambda captures the shadowed decl explicitly. if (RD->getLambdaCaptureDefault() == LCD_None) {
CaptureLoc = getCaptureLocation(LSI, cast<VarDecl>(ShadowedDecl)); // Try to avoid warnings for lambdas with an explicit capture
// list. Warn only when the lambda captures the shadowed decl
// explicitly.
CaptureLoc = getCaptureLocation(LSI, VD);
if (CaptureLoc.isInvalid()) if (CaptureLoc.isInvalid())
WarningDiag = diag::warn_decl_shadow_uncaptured_local; WarningDiag = diag::warn_decl_shadow_uncaptured_local;
} else { } else {
// Remember that this was shadowed so we can avoid the warning if the // Remember that this was shadowed so we can avoid the warning if
// shadowed decl isn't captured and the warning settings allow it. // the shadowed decl isn't captured and the warning settings allow
// it.
cast<LambdaScopeInfo>(getCurFunction()) cast<LambdaScopeInfo>(getCurFunction())
->ShadowingDecls.push_back( ->ShadowingDecls.push_back({D, VD});
{cast<VarDecl>(D), cast<VarDecl>(ShadowedDecl)});
return; return;
} }
} }
if (isa<FieldDecl>(ShadowedDecl)) {
if (cast<VarDecl>(ShadowedDecl)->hasLocalStorage()) { // If lambda can capture this, then emit default shadowing warning,
// Otherwise it is not really a shadowing case since field is not
// available in lambda's body.
// At this point we don't know that lambda can capture this, so
// remember that this was shadowed and delay until we know.
cast<LambdaScopeInfo>(getCurFunction())
->ShadowingDecls.push_back({D, ShadowedDecl});
return;
}
}
if (const auto *VD = dyn_cast<VarDecl>(ShadowedDecl);
VD && VD->hasLocalStorage()) {
// A variable can't shadow a local variable in an enclosing scope, if // A variable can't shadow a local variable in an enclosing scope, if
// they are separated by a non-capturing declaration context. // they are separated by a non-capturing declaration context.
for (DeclContext *ParentDC = NewDC; for (DeclContext *ParentDC = NewDC;
@@ -8468,19 +8480,28 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl,
/// when these variables are captured by the lambda. /// when these variables are captured by the lambda.
void Sema::DiagnoseShadowingLambdaDecls(const LambdaScopeInfo *LSI) { void Sema::DiagnoseShadowingLambdaDecls(const LambdaScopeInfo *LSI) {
for (const auto &Shadow : LSI->ShadowingDecls) { for (const auto &Shadow : LSI->ShadowingDecls) {
const VarDecl *ShadowedDecl = Shadow.ShadowedDecl; const NamedDecl *ShadowedDecl = Shadow.ShadowedDecl;
// Try to avoid the warning when the shadowed decl isn't captured. // Try to avoid the warning when the shadowed decl isn't captured.
SourceLocation CaptureLoc = getCaptureLocation(LSI, ShadowedDecl);
const DeclContext *OldDC = ShadowedDecl->getDeclContext(); const DeclContext *OldDC = ShadowedDecl->getDeclContext();
Diag(Shadow.VD->getLocation(), CaptureLoc.isInvalid() if (const auto *VD = dyn_cast<VarDecl>(ShadowedDecl)) {
? diag::warn_decl_shadow_uncaptured_local SourceLocation CaptureLoc = getCaptureLocation(LSI, VD);
Diag(Shadow.VD->getLocation(),
CaptureLoc.isInvalid() ? diag::warn_decl_shadow_uncaptured_local
: diag::warn_decl_shadow) : diag::warn_decl_shadow)
<< Shadow.VD->getDeclName() << Shadow.VD->getDeclName()
<< computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC;
if (!CaptureLoc.isInvalid()) if (CaptureLoc.isValid())
Diag(CaptureLoc, diag::note_var_explicitly_captured_here) Diag(CaptureLoc, diag::note_var_explicitly_captured_here)
<< Shadow.VD->getDeclName() << /*explicitly*/ 0; << Shadow.VD->getDeclName() << /*explicitly*/ 0;
Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);
} else if (isa<FieldDecl>(ShadowedDecl)) {
Diag(Shadow.VD->getLocation(),
LSI->isCXXThisCaptured() ? diag::warn_decl_shadow
: diag::warn_decl_shadow_uncaptured_local)
<< Shadow.VD->getDeclName()
<< computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC;
Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);
}
} }
} }
@@ -14470,6 +14470,23 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
CurFPFeatureOverrides()); CurFPFeatureOverrides());
} }
// If this is the .* operator, which is not overloadable, just
// create a built-in binary operator.
if (Opc == BO_PtrMemD) {
auto CheckPlaceholder = [&](Expr *&Arg) {
ExprResult Res = CheckPlaceholderExpr(Arg);
if (Res.isUsable())
Arg = Res.get();
return !Res.isUsable();
};
// CreateBuiltinBinOp() doesn't like it if we tell it to create a '.*'
// expression that contains placeholders (in either the LHS or RHS).
if (CheckPlaceholder(Args[0]) || CheckPlaceholder(Args[1]))
return ExprError();
return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
}
// Always do placeholder-like conversions on the RHS. // Always do placeholder-like conversions on the RHS.
if (checkPlaceholderForOverload(*this, Args[1])) if (checkPlaceholderForOverload(*this, Args[1]))
return ExprError(); return ExprError();
@@ -14489,11 +14506,6 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
if (Opc == BO_Assign && !Args[0]->getType()->isOverloadableType()) if (Opc == BO_Assign && !Args[0]->getType()->isOverloadableType())
return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]); return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
// If this is the .* operator, which is not overloadable, just
// create a built-in binary operator.
if (Opc == BO_PtrMemD)
return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
// Build the overload set. // Build the overload set.
OverloadCandidateSet CandidateSet(OpLoc, OverloadCandidateSet::CSK_Operator, OverloadCandidateSet CandidateSet(OpLoc, OverloadCandidateSet::CSK_Operator,
OverloadCandidateSet::OperatorRewriteInfo( OverloadCandidateSet::OperatorRewriteInfo(
@@ -1830,7 +1830,27 @@ static TemplateParameterList *GetTemplateParameterList(TemplateDecl *TD) {
// Make sure we get the template parameter list from the most // Make sure we get the template parameter list from the most
// recent declaration, since that is the only one that is guaranteed to // recent declaration, since that is the only one that is guaranteed to
// have all the default template argument information. // have all the default template argument information.
return cast<TemplateDecl>(TD->getMostRecentDecl())->getTemplateParameters(); Decl *D = TD->getMostRecentDecl();
// C++11 [temp.param]p12:
// A default template argument shall not be specified in a friend class
// template declaration.
//
// Skip past friend *declarations* because they are not supposed to contain
// default template arguments. Moreover, these declarations may introduce
// template parameters living in different template depths than the
// corresponding template parameters in TD, causing unmatched constraint
// substitution.
//
// FIXME: Diagnose such cases within a class template:
// template <class T>
// struct S {
// template <class = void> friend struct C;
// };
// template struct S<int>;
while (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None &&
D->getPreviousDecl())
D = D->getPreviousDecl();
return cast<TemplateDecl>(D)->getTemplateParameters();
} }
DeclResult Sema::CheckClassTemplate( DeclResult Sema::CheckClassTemplate(
@@ -9745,7 +9745,7 @@ void ASTReader::finishPendingActions() {
!NonConstDefn->isLateTemplateParsed() && !NonConstDefn->isLateTemplateParsed() &&
// We only perform ODR checks for decls not in the explicit // We only perform ODR checks for decls not in the explicit
// global module fragment. // global module fragment.
!shouldSkipCheckingODR(FD) && !FD->shouldSkipCheckingODR() &&
FD->getODRHash() != NonConstDefn->getODRHash()) { FD->getODRHash() != NonConstDefn->getODRHash()) {
if (!isa<CXXMethodDecl>(FD)) { if (!isa<CXXMethodDecl>(FD)) {
PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn); PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
@@ -832,7 +832,7 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
Reader.mergeDefinitionVisibility(OldDef, ED); Reader.mergeDefinitionVisibility(OldDef, ED);
// We don't want to check the ODR hash value for declarations from global // We don't want to check the ODR hash value for declarations from global
// module fragment. // module fragment.
if (!shouldSkipCheckingODR(ED) && if (!ED->shouldSkipCheckingODR() &&
OldDef->getODRHash() != ED->getODRHash()) OldDef->getODRHash() != ED->getODRHash())
Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED); Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED);
} else { } else {
@@ -874,7 +874,7 @@ void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) {
VisitRecordDeclImpl(RD); VisitRecordDeclImpl(RD);
// We should only reach here if we're in C/Objective-C. There is no // We should only reach here if we're in C/Objective-C. There is no
// global module fragment. // global module fragment.
assert(!shouldSkipCheckingODR(RD)); assert(!RD->shouldSkipCheckingODR());
RD->setODRHash(Record.readInt()); RD->setODRHash(Record.readInt());
// Maintain the invariant of a redeclaration chain containing only // Maintain the invariant of a redeclaration chain containing only
@@ -2152,7 +2152,7 @@ void ASTDeclReader::MergeDefinitionData(
} }
// We don't want to check ODR for decls in the global module fragment. // We don't want to check ODR for decls in the global module fragment.
if (shouldSkipCheckingODR(MergeDD.Definition)) if (MergeDD.Definition->shouldSkipCheckingODR())
return; return;
if (D->getODRHash() != MergeDD.ODRHash) { if (D->getODRHash() != MergeDD.ODRHash) {
@@ -3526,7 +3526,7 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
// same template specialization into the same CXXRecordDecl. // same template specialization into the same CXXRecordDecl.
auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext()); auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext());
if (MergedDCIt != Reader.MergedDeclContexts.end() && if (MergedDCIt != Reader.MergedDeclContexts.end() &&
!shouldSkipCheckingODR(D) && MergedDCIt->second == D->getDeclContext()) !D->shouldSkipCheckingODR() && MergedDCIt->second == D->getDeclContext())
Reader.PendingOdrMergeChecks.push_back(D); Reader.PendingOdrMergeChecks.push_back(D);
return FindExistingResult(Reader, D, /*Existing=*/nullptr, return FindExistingResult(Reader, D, /*Existing=*/nullptr,
@@ -6010,7 +6010,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
BitsPacker DefinitionBits; BitsPacker DefinitionBits;
bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); bool ShouldSkipCheckingODR = D->shouldSkipCheckingODR();
DefinitionBits.addBit(ShouldSkipCheckingODR); DefinitionBits.addBit(ShouldSkipCheckingODR);
#define FIELD(Name, Width, Merge) \ #define FIELD(Name, Width, Merge) \
@@ -488,7 +488,7 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
BitsPacker EnumDeclBits; BitsPacker EnumDeclBits;
EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8);
EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8);
bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); bool ShouldSkipCheckingODR = D->shouldSkipCheckingODR();
EnumDeclBits.addBit(ShouldSkipCheckingODR); EnumDeclBits.addBit(ShouldSkipCheckingODR);
EnumDeclBits.addBit(D->isScoped()); EnumDeclBits.addBit(D->isScoped());
EnumDeclBits.addBit(D->isScopedUsingClassTag()); EnumDeclBits.addBit(D->isScopedUsingClassTag());
@@ -514,7 +514,7 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
!D->isTopLevelDeclInObjCContainer() && !D->isTopLevelDeclInObjCContainer() &&
!CXXRecordDecl::classofKind(D->getKind()) && !CXXRecordDecl::classofKind(D->getKind()) &&
!D->getIntegerTypeSourceInfo() && !D->getMemberSpecializationInfo() && !D->getIntegerTypeSourceInfo() && !D->getMemberSpecializationInfo() &&
!needsAnonymousDeclarationNumber(D) && !shouldSkipCheckingODR(D) && !needsAnonymousDeclarationNumber(D) && !D->shouldSkipCheckingODR() &&
D->getDeclName().getNameKind() == DeclarationName::Identifier) D->getDeclName().getNameKind() == DeclarationName::Identifier)
AbbrevToUse = Writer.getDeclEnumAbbrev(); AbbrevToUse = Writer.getDeclEnumAbbrev();
@@ -680,7 +680,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
// FIXME: stable encoding // FIXME: stable encoding
FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3); FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3);
FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3); FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3);
bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); bool ShouldSkipCheckingODR = D->shouldSkipCheckingODR();
FunctionDeclBits.addBit(ShouldSkipCheckingODR); FunctionDeclBits.addBit(ShouldSkipCheckingODR);
FunctionDeclBits.addBit(D->isInlineSpecified()); FunctionDeclBits.addBit(D->isInlineSpecified());
FunctionDeclBits.addBit(D->isInlined()); FunctionDeclBits.addBit(D->isInlined());
@@ -1514,7 +1514,7 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) {
D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() &&
!D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() &&
D->getDeclName().getNameKind() == DeclarationName::Identifier && D->getDeclName().getNameKind() == DeclarationName::Identifier &&
!shouldSkipCheckingODR(D) && !D->hasExtInfo() && !D->shouldSkipCheckingODR() && !D->hasExtInfo() &&
!D->isExplicitlyDefaulted()) { !D->isExplicitlyDefaulted()) {
if (D->getTemplatedKind() == FunctionDecl::TK_NonTemplate || if (D->getTemplatedKind() == FunctionDecl::TK_NonTemplate ||
D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate || D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate ||
@@ -1409,7 +1409,7 @@ CallEventManager::getSimpleCall(const CallExpr *CE, ProgramStateRef State,
if (const auto *OpCE = dyn_cast<CXXOperatorCallExpr>(CE)) { if (const auto *OpCE = dyn_cast<CXXOperatorCallExpr>(CE)) {
const FunctionDecl *DirectCallee = OpCE->getDirectCallee(); const FunctionDecl *DirectCallee = OpCE->getDirectCallee();
if (const auto *MD = dyn_cast<CXXMethodDecl>(DirectCallee)) if (const auto *MD = dyn_cast<CXXMethodDecl>(DirectCallee))
if (MD->isInstance()) if (MD->isImplicitObjectMemberFunction())
return create<CXXMemberOperatorCall>(OpCE, State, LCtx, ElemRef); return create<CXXMemberOperatorCall>(OpCE, State, LCtx, ElemRef);
} else if (CE->getCallee()->getType()->isBlockPointerType()) { } else if (CE->getCallee()->getType()->isBlockPointerType()) {
@@ -13,7 +13,7 @@
#define QUAD_PRECISION #define QUAD_PRECISION
#include "fp_lib.h" #include "fp_lib.h"
#if defined(CRT_HAS_TF_MODE) #if defined(CRT_HAS_F128)
// Returns: the quotient of (a + ib) / (c + id) // Returns: the quotient of (a + ib) / (c + id)
@@ -22,6 +22,7 @@
#include "int_lib.h" #include "int_lib.h"
#include "int_math.h" #include "int_math.h"
#include "int_types.h"
#include <limits.h> #include <limits.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
@@ -93,13 +94,14 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
#elif defined QUAD_PRECISION #elif defined QUAD_PRECISION
#if defined(CRT_HAS_TF_MODE) #if defined(CRT_HAS_F128) && defined(CRT_HAS_128BIT)
typedef uint64_t half_rep_t; typedef uint64_t half_rep_t;
typedef __uint128_t rep_t; typedef __uint128_t rep_t;
typedef __int128_t srep_t; typedef __int128_t srep_t;
typedef tf_float fp_t; typedef tf_float fp_t;
#define HALF_REP_C UINT64_C #define HALF_REP_C UINT64_C
#define REP_C (__uint128_t) #define REP_C (__uint128_t)
#if defined(CRT_HAS_IEEE_TF)
// Note: Since there is no explicit way to tell compiler the constant is a // Note: Since there is no explicit way to tell compiler the constant is a
// 128-bit integer, we let the constant be casted to 128-bit integer // 128-bit integer, we let the constant be casted to 128-bit integer
#define significandBits 112 #define significandBits 112
@@ -188,7 +190,10 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
#undef Word_HiMask #undef Word_HiMask
#undef Word_LoMask #undef Word_LoMask
#undef Word_FullMask #undef Word_FullMask
#endif // defined(CRT_HAS_TF_MODE) #endif // defined(CRT_HAS_IEEE_TF)
#else
typedef long double fp_t;
#endif // defined(CRT_HAS_F128) && defined(CRT_HAS_128BIT)
#else #else
#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
#endif #endif
@@ -196,19 +201,6 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \ #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \
(defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE)) (defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE))
#define typeWidth (sizeof(rep_t) * CHAR_BIT) #define typeWidth (sizeof(rep_t) * CHAR_BIT)
#define exponentBits (typeWidth - significandBits - 1)
#define maxExponent ((1 << exponentBits) - 1)
#define exponentBias (maxExponent >> 1)
#define implicitBit (REP_C(1) << significandBits)
#define significandMask (implicitBit - 1U)
#define signBit (REP_C(1) << (significandBits + exponentBits))
#define absMask (signBit - 1U)
#define exponentMask (absMask ^ significandMask)
#define oneRep ((rep_t)exponentBias << significandBits)
#define infRep exponentMask
#define quietBit (implicitBit >> 1)
#define qnanRep (exponentMask | quietBit)
static __inline rep_t toRep(fp_t x) { static __inline rep_t toRep(fp_t x) {
const union { const union {
@@ -226,6 +218,21 @@ static __inline fp_t fromRep(rep_t x) {
return rep.f; return rep.f;
} }
#if !defined(QUAD_PRECISION) || defined(CRT_HAS_IEEE_TF)
#define exponentBits (typeWidth - significandBits - 1)
#define maxExponent ((1 << exponentBits) - 1)
#define exponentBias (maxExponent >> 1)
#define implicitBit (REP_C(1) << significandBits)
#define significandMask (implicitBit - 1U)
#define signBit (REP_C(1) << (significandBits + exponentBits))
#define absMask (signBit - 1U)
#define exponentMask (absMask ^ significandMask)
#define oneRep ((rep_t)exponentBias << significandBits)
#define infRep exponentMask
#define quietBit (implicitBit >> 1)
#define qnanRep (exponentMask | quietBit)
static __inline int normalize(rep_t *significand) { static __inline int normalize(rep_t *significand) {
const int shift = rep_clz(*significand) - rep_clz(implicitBit); const int shift = rep_clz(*significand) - rep_clz(implicitBit);
*significand <<= shift; *significand <<= shift;
@@ -328,6 +335,8 @@ static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) {
return fromRep(sign | ((rep_t)exp << significandBits) | sig); return fromRep(sign | ((rep_t)exp << significandBits) | sig);
} }
#endif // !defined(QUAD_PRECISION) || defined(CRT_HAS_IEEE_TF)
// Avoid using fmax from libm. // Avoid using fmax from libm.
static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) { static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) {
// If either argument is NaN, return the other argument. If both are NaN, // If either argument is NaN, return the other argument. If both are NaN,
@@ -405,6 +414,8 @@ static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
#define __compiler_rt_logbl crt_logbl #define __compiler_rt_logbl crt_logbl
#define __compiler_rt_scalbnl crt_scalbnl #define __compiler_rt_scalbnl crt_scalbnl
#define __compiler_rt_fmaxl crt_fmaxl #define __compiler_rt_fmaxl crt_fmaxl
#define crt_fabstf crt_fabsl
#define crt_copysigntf crt_copysignl
#else #else
#error Unsupported TF mode type #error Unsupported TF mode type
#endif #endif
@@ -189,12 +189,16 @@ typedef long double tf_float;
#define CRT_LDBL_IEEE_F128 #define CRT_LDBL_IEEE_F128
#endif #endif
#define TF_C(x) x##L #define TF_C(x) x##L
#elif __LDBL_MANT_DIG__ == 113 #elif __LDBL_MANT_DIG__ == 113 || \
// Use long double instead of __float128 if it matches the IEEE 128-bit format. (__FLT_RADIX__ == 16 && __LDBL_MANT_DIG__ == 28)
// Use long double instead of __float128 if it matches the IEEE 128-bit format
// or the IBM hexadecimal format.
#define CRT_LDBL_128BIT #define CRT_LDBL_128BIT
#define CRT_HAS_F128 #define CRT_HAS_F128
#if __LDBL_MANT_DIG__ == 113
#define CRT_HAS_IEEE_TF #define CRT_HAS_IEEE_TF
#define CRT_LDBL_IEEE_F128 #define CRT_LDBL_IEEE_F128
#endif
typedef long double tf_float; typedef long double tf_float;
#define TF_C(x) x##L #define TF_C(x) x##L
#elif defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) #elif defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
@@ -15,7 +15,7 @@
#include "int_lib.h" #include "int_lib.h"
#include "int_math.h" #include "int_math.h"
#if defined(CRT_HAS_TF_MODE) #if defined(CRT_HAS_F128)
// Returns: the product of a + ib and c + id // Returns: the product of a + ib and c + id
@@ -467,7 +467,7 @@ void __msan_init() {
__msan_clear_on_return(); __msan_clear_on_return();
if (__msan_get_track_origins()) if (__msan_get_track_origins())
VPrintf(1, "msan_track_origins\n"); VPrintf(1, "msan_track_origins\n");
if (!InitShadow(__msan_get_track_origins())) { if (!InitShadowWithReExec(__msan_get_track_origins())) {
Printf("FATAL: MemorySanitizer can not mmap the shadow memory.\n"); Printf("FATAL: MemorySanitizer can not mmap the shadow memory.\n");
Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n"); Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
Printf("FATAL: Disabling ASLR is known to cause this error.\n"); Printf("FATAL: Disabling ASLR is known to cause this error.\n");
@@ -33,12 +33,18 @@ struct MappingDesc {
uptr start; uptr start;
uptr end; uptr end;
enum Type { enum Type {
INVALID, APP, SHADOW, ORIGIN INVALID = 1,
ALLOCATOR = 2,
APP = 4,
SHADOW = 8,
ORIGIN = 16,
} type; } type;
const char *name; const char *name;
}; };
// Note: MappingDesc::ALLOCATOR entries are only used to check for memory
// layout compatibility. The actual allocation settings are in
// msan_allocator.cpp, which need to be kept in sync.
#if SANITIZER_LINUX && defined(__mips64) #if SANITIZER_LINUX && defined(__mips64)
// MIPS64 maps: // MIPS64 maps:
@@ -84,7 +90,8 @@ const MappingDesc kMemoryLayout[] = {
{0X0B00000000000, 0X0C00000000000, MappingDesc::SHADOW, "shadow-10-13"}, {0X0B00000000000, 0X0C00000000000, MappingDesc::SHADOW, "shadow-10-13"},
{0X0C00000000000, 0X0D00000000000, MappingDesc::INVALID, "invalid"}, {0X0C00000000000, 0X0D00000000000, MappingDesc::INVALID, "invalid"},
{0X0D00000000000, 0X0E00000000000, MappingDesc::ORIGIN, "origin-10-13"}, {0X0D00000000000, 0X0E00000000000, MappingDesc::ORIGIN, "origin-10-13"},
{0X0E00000000000, 0X1000000000000, MappingDesc::APP, "app-15"}, {0x0E00000000000, 0x0E40000000000, MappingDesc::ALLOCATOR, "allocator"},
{0X0E40000000000, 0X1000000000000, MappingDesc::APP, "app-15"},
}; };
# define MEM_TO_SHADOW(mem) ((uptr)mem ^ 0xB00000000000ULL) # define MEM_TO_SHADOW(mem) ((uptr)mem ^ 0xB00000000000ULL)
# define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x200000000000ULL) # define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x200000000000ULL)
@@ -106,7 +113,8 @@ const MappingDesc kMemoryLayout[] = {
{0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"}, {0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"},
{0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"}, {0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"},
{0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"}, {0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"},
{0x700000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}}; {0x700000000000ULL, 0x740000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
{0x740000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
# define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL) # define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL)
# define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x100000000000ULL) # define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x100000000000ULL)
@@ -118,7 +126,8 @@ const MappingDesc kMemoryLayout[] = {
{0x180200000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"}, {0x180200000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"},
{0x1C0000000000ULL, 0x2C0200000000ULL, MappingDesc::ORIGIN, "origin"}, {0x1C0000000000ULL, 0x2C0200000000ULL, MappingDesc::ORIGIN, "origin"},
{0x2C0200000000ULL, 0x300000000000ULL, MappingDesc::INVALID, "invalid"}, {0x2C0200000000ULL, 0x300000000000ULL, MappingDesc::INVALID, "invalid"},
{0x300000000000ULL, 0x800000000000ULL, MappingDesc::APP, "high memory"}}; {0x300000000000ULL, 0x320000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
{0x320000000000ULL, 0x800000000000ULL, MappingDesc::APP, "high memory"}};
// Various kernels use different low end ranges but we can combine them into one // Various kernels use different low end ranges but we can combine them into one
// big range. They also use different high end ranges but we can map them all to // big range. They also use different high end ranges but we can map them all to
@@ -141,7 +150,8 @@ const MappingDesc kMemoryLayout[] = {
{0x180000000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"}, {0x180000000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"},
{0x1C0000000000ULL, 0x2C0000000000ULL, MappingDesc::ORIGIN, "origin"}, {0x1C0000000000ULL, 0x2C0000000000ULL, MappingDesc::ORIGIN, "origin"},
{0x2C0000000000ULL, 0x440000000000ULL, MappingDesc::INVALID, "invalid"}, {0x2C0000000000ULL, 0x440000000000ULL, MappingDesc::INVALID, "invalid"},
{0x440000000000ULL, 0x500000000000ULL, MappingDesc::APP, "high memory"}}; {0x440000000000ULL, 0x460000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
{0x460000000000ULL, 0x500000000000ULL, MappingDesc::APP, "high memory"}};
#define MEM_TO_SHADOW(mem) \ #define MEM_TO_SHADOW(mem) \
((((uptr)(mem)) & ~0xC00000000000ULL) + 0x080000000000ULL) ((((uptr)(mem)) & ~0xC00000000000ULL) + 0x080000000000ULL)
@@ -208,7 +218,8 @@ const MappingDesc kMemoryLayout[] = {
{0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"}, {0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"},
{0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"}, {0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"},
{0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"}, {0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"},
{0x700000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}}; {0x700000000000ULL, 0x740000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
{0x740000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
#define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL) #define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL)
#define SHADOW_TO_ORIGIN(mem) (((uptr)(mem)) + 0x100000000000ULL) #define SHADOW_TO_ORIGIN(mem) (((uptr)(mem)) + 0x100000000000ULL)
@@ -223,20 +234,22 @@ const uptr kMemoryLayoutSize = sizeof(kMemoryLayout) / sizeof(kMemoryLayout[0]);
#ifndef __clang__ #ifndef __clang__
__attribute__((optimize("unroll-loops"))) __attribute__((optimize("unroll-loops")))
#endif #endif
inline bool addr_is_type(uptr addr, MappingDesc::Type mapping_type) { inline bool
addr_is_type(uptr addr, int mapping_types) {
// It is critical for performance that this loop is unrolled (because then it is // It is critical for performance that this loop is unrolled (because then it is
// simplified into just a few constant comparisons). // simplified into just a few constant comparisons).
#ifdef __clang__ #ifdef __clang__
#pragma unroll #pragma unroll
#endif #endif
for (unsigned i = 0; i < kMemoryLayoutSize; ++i) for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
if (kMemoryLayout[i].type == mapping_type && if ((kMemoryLayout[i].type & mapping_types) &&
addr >= kMemoryLayout[i].start && addr < kMemoryLayout[i].end) addr >= kMemoryLayout[i].start && addr < kMemoryLayout[i].end)
return true; return true;
return false; return false;
} }
#define MEM_IS_APP(mem) addr_is_type((uptr)(mem), MappingDesc::APP) #define MEM_IS_APP(mem) \
(addr_is_type((uptr)(mem), MappingDesc::APP | MappingDesc::ALLOCATOR))
#define MEM_IS_SHADOW(mem) addr_is_type((uptr)(mem), MappingDesc::SHADOW) #define MEM_IS_SHADOW(mem) addr_is_type((uptr)(mem), MappingDesc::SHADOW)
#define MEM_IS_ORIGIN(mem) addr_is_type((uptr)(mem), MappingDesc::ORIGIN) #define MEM_IS_ORIGIN(mem) addr_is_type((uptr)(mem), MappingDesc::ORIGIN)
@@ -250,7 +263,7 @@ extern bool msan_init_is_running;
extern int msan_report_count; extern int msan_report_count;
bool ProtectRange(uptr beg, uptr end); bool ProtectRange(uptr beg, uptr end);
bool InitShadow(bool init_origins); bool InitShadowWithReExec(bool init_origins);
char *GetProcSelfMaps(); char *GetProcSelfMaps();
void InitializeInterceptors(); void InitializeInterceptors();
@@ -48,6 +48,9 @@ struct MsanMapUnmapCallback {
} }
}; };
// Note: to ensure that the allocator is compatible with the application memory
// layout (especially with high-entropy ASLR), kSpaceBeg and kSpaceSize must be
// duplicated as MappingDesc::ALLOCATOR in msan.h.
#if defined(__mips64) #if defined(__mips64)
static const uptr kMaxAllowedMallocSize = 2UL << 30; static const uptr kMaxAllowedMallocSize = 2UL << 30;
@@ -20,6 +20,9 @@
# include <signal.h> # include <signal.h>
# include <stdio.h> # include <stdio.h>
# include <stdlib.h> # include <stdlib.h>
# if SANITIZER_LINUX
# include <sys/personality.h>
# endif
# include <sys/resource.h> # include <sys/resource.h>
# include <sys/time.h> # include <sys/time.h>
# include <unistd.h> # include <unistd.h>
@@ -43,11 +46,13 @@ void ReportMapRange(const char *descr, uptr beg, uptr size) {
} }
} }
static bool CheckMemoryRangeAvailability(uptr beg, uptr size) { static bool CheckMemoryRangeAvailability(uptr beg, uptr size, bool verbose) {
if (size > 0) { if (size > 0) {
uptr end = beg + size - 1; uptr end = beg + size - 1;
if (!MemoryRangeIsAvailable(beg, end)) { if (!MemoryRangeIsAvailable(beg, end)) {
Printf("FATAL: Memory range 0x%zx - 0x%zx is not available.\n", beg, end); if (verbose)
Printf("FATAL: Memory range 0x%zx - 0x%zx is not available.\n", beg,
end);
return false; return false;
} }
} }
@@ -86,7 +91,7 @@ static void CheckMemoryLayoutSanity() {
CHECK(addr_is_type(start, type)); CHECK(addr_is_type(start, type));
CHECK(addr_is_type((start + end) / 2, type)); CHECK(addr_is_type((start + end) / 2, type));
CHECK(addr_is_type(end - 1, type)); CHECK(addr_is_type(end - 1, type));
if (type == MappingDesc::APP) { if (type == MappingDesc::APP || type == MappingDesc::ALLOCATOR) {
uptr addr = start; uptr addr = start;
CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr))); CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));
CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr))); CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));
@@ -106,7 +111,7 @@ static void CheckMemoryLayoutSanity() {
} }
} }
bool InitShadow(bool init_origins) { static bool InitShadow(bool init_origins, bool dry_run) {
// Let user know mapping parameters first. // Let user know mapping parameters first.
VPrintf(1, "__msan_init %p\n", reinterpret_cast<void *>(&__msan_init)); VPrintf(1, "__msan_init %p\n", reinterpret_cast<void *>(&__msan_init));
for (unsigned i = 0; i < kMemoryLayoutSize; ++i) for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
@@ -116,6 +121,7 @@ bool InitShadow(bool init_origins) {
CheckMemoryLayoutSanity(); CheckMemoryLayoutSanity();
if (!MEM_IS_APP(&__msan_init)) { if (!MEM_IS_APP(&__msan_init)) {
if (!dry_run)
Printf("FATAL: Code %p is out of application range. Non-PIE build?\n", Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
reinterpret_cast<void *>(&__msan_init)); reinterpret_cast<void *>(&__msan_init));
return false; return false;
@@ -138,20 +144,26 @@ bool InitShadow(bool init_origins) {
bool protect = type == MappingDesc::INVALID || bool protect = type == MappingDesc::INVALID ||
(!init_origins && type == MappingDesc::ORIGIN); (!init_origins && type == MappingDesc::ORIGIN);
CHECK(!(map && protect)); CHECK(!(map && protect));
if (!map && !protect) if (!map && !protect) {
CHECK(type == MappingDesc::APP); CHECK(type == MappingDesc::APP || type == MappingDesc::ALLOCATOR);
if (dry_run && type == MappingDesc::ALLOCATOR &&
!CheckMemoryRangeAvailability(start, size, !dry_run))
return false;
}
if (map) { if (map) {
if (!CheckMemoryRangeAvailability(start, size)) if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))
return false; return false;
if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name)) if (!dry_run &&
!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))
return false; return false;
if (common_flags()->use_madv_dontdump) if (!dry_run && common_flags()->use_madv_dontdump)
DontDumpShadowMemory(start, size); DontDumpShadowMemory(start, size);
} }
if (protect) { if (protect) {
if (!CheckMemoryRangeAvailability(start, size)) if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))
return false; return false;
if (!ProtectMemoryRange(start, size, kMemoryLayout[i].name)) if (!dry_run && !ProtectMemoryRange(start, size, kMemoryLayout[i].name))
return false; return false;
} }
} }
@@ -159,6 +171,35 @@ bool InitShadow(bool init_origins) {
return true; return true;
} }
bool InitShadowWithReExec(bool init_origins) {
// Start with dry run: check layout is ok, but don't print warnings because
// warning messages will cause tests to fail (even if we successfully re-exec
// after the warning).
bool success = InitShadow(__msan_get_track_origins(), true);
if (!success) {
# if SANITIZER_LINUX
// Perhaps ASLR entropy is too high. If ASLR is enabled, re-exec without it.
int old_personality = personality(0xffffffff);
bool aslr_on =
(old_personality != -1) && ((old_personality & ADDR_NO_RANDOMIZE) == 0);
if (aslr_on) {
VReport(1,
"WARNING: MemorySanitizer: memory layout is incompatible, "
"possibly due to high-entropy ASLR.\n"
"Re-execing with fixed virtual address space.\n"
"N.B. reducing ASLR entropy is preferable.\n");
CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
ReExec();
}
# endif
}
// The earlier dry run didn't actually map or protect anything. Run again in
// non-dry run mode.
return success && InitShadow(__msan_get_track_origins(), false);
}
static void MsanAtExit(void) { static void MsanAtExit(void) {
if (flags()->print_stats && (flags()->atexit || msan_report_count > 0)) if (flags()->print_stats && (flags()->atexit || msan_report_count > 0))
ReportStats(); ReportStats();
@@ -14,6 +14,7 @@
#include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_errno.h" #include "sanitizer_common/sanitizer_errno.h"
#include "sanitizer_common/sanitizer_glibc_version.h"
#include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_libc.h"
#include "sanitizer_common/sanitizer_linux.h" #include "sanitizer_common/sanitizer_linux.h"
#include "sanitizer_common/sanitizer_platform_limits_netbsd.h" #include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
@@ -1613,47 +1614,40 @@ TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
FdAccess(thr, pc, fd); FdAccess(thr, pc, fd);
return REAL(__fxstat)(version, fd, buf); return REAL(__fxstat)(version, fd, buf);
} }
#define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat)
#else
#define TSAN_MAYBE_INTERCEPT___FXSTAT
#endif
TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
#if SANITIZER_GLIBC
SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf);
if (fd > 0)
FdAccess(thr, pc, fd);
return REAL(__fxstat)(0, fd, buf);
#else
SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
if (fd > 0)
FdAccess(thr, pc, fd);
return REAL(fstat)(fd, buf);
#endif
}
#if SANITIZER_GLIBC
TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf); SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
if (fd > 0) if (fd > 0)
FdAccess(thr, pc, fd); FdAccess(thr, pc, fd);
return REAL(__fxstat64)(version, fd, buf); return REAL(__fxstat64)(version, fd, buf);
} }
#define TSAN_MAYBE_INTERCEPT___FXSTAT64 TSAN_INTERCEPT(__fxstat64) #define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat); TSAN_INTERCEPT(__fxstat64)
#else #else
#define TSAN_MAYBE_INTERCEPT___FXSTAT64 #define TSAN_MAYBE_INTERCEPT___FXSTAT
#endif #endif
#if SANITIZER_GLIBC #if !SANITIZER_GLIBC || __GLIBC_PREREQ(2, 33)
TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) { TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf); SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
if (fd > 0) if (fd > 0)
FdAccess(thr, pc, fd); FdAccess(thr, pc, fd);
return REAL(__fxstat64)(0, fd, buf); return REAL(fstat)(fd, buf);
} }
#define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64) # define TSAN_MAYBE_INTERCEPT_FSTAT TSAN_INTERCEPT(fstat)
#else #else
#define TSAN_MAYBE_INTERCEPT_FSTAT64 # define TSAN_MAYBE_INTERCEPT_FSTAT
#endif
#if __GLIBC_PREREQ(2, 33)
TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
SCOPED_TSAN_INTERCEPTOR(fstat64, fd, buf);
if (fd > 0)
FdAccess(thr, pc, fd);
return REAL(fstat64)(fd, buf);
}
# define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64)
#else
# define TSAN_MAYBE_INTERCEPT_FSTAT64
#endif #endif
TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) { TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) {
@@ -2950,10 +2944,9 @@ void InitializeInterceptors() {
TSAN_INTERCEPT(pthread_once); TSAN_INTERCEPT(pthread_once);
TSAN_INTERCEPT(fstat);
TSAN_MAYBE_INTERCEPT___FXSTAT; TSAN_MAYBE_INTERCEPT___FXSTAT;
TSAN_MAYBE_INTERCEPT_FSTAT;
TSAN_MAYBE_INTERCEPT_FSTAT64; TSAN_MAYBE_INTERCEPT_FSTAT64;
TSAN_MAYBE_INTERCEPT___FXSTAT64;
TSAN_INTERCEPT(open); TSAN_INTERCEPT(open);
TSAN_MAYBE_INTERCEPT_OPEN64; TSAN_MAYBE_INTERCEPT_OPEN64;
TSAN_INTERCEPT(creat); TSAN_INTERCEPT(creat);
@@ -72,11 +72,10 @@
# endif # endif
#endif #endif
// Availability markup is disabled when building the library, or when the compiler // Availability markup is disabled when building the library, or when a non-Clang
// compiler is used because only Clang supports the necessary attributes.
// doesn't support the proper attributes. // doesn't support the proper attributes.
#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED)
!__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) || \
!__has_extension(pragma_clang_attribute_external_declaration)
# if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
# define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS
# endif # endif
@@ -5,7 +5,7 @@
{ {
"logical-name": "std", "logical-name": "std",
"source-path": "@LIBCXX_MODULE_RELATIVE_PATH@/std.cppm", "source-path": "@LIBCXX_MODULE_RELATIVE_PATH@/std.cppm",
"is-standard-library": true, "is-std-library": true,
"local-arguments": { "local-arguments": {
"system-include-directories": [ "system-include-directories": [
"@LIBCXX_MODULE_RELATIVE_PATH@" "@LIBCXX_MODULE_RELATIVE_PATH@"
+1 -1
View File
@@ -172,7 +172,7 @@ binImports(COFFLinkerContext &ctx,
// A chunk for the delay import descriptor table etnry. // A chunk for the delay import descriptor table etnry.
class DelayDirectoryChunk : public NonSectionChunk { class DelayDirectoryChunk : public NonSectionChunk {
public: public:
explicit DelayDirectoryChunk(Chunk *n) : dllName(n) {} explicit DelayDirectoryChunk(Chunk *n) : dllName(n) { setAlignment(4); }
size_t getSize() const override { size_t getSize() const override {
return sizeof(delay_import_directory_table_entry); return sizeof(delay_import_directory_table_entry);
@@ -11,6 +11,7 @@
#include "Symbols.h" #include "Symbols.h"
#include "SyntheticSections.h" #include "SyntheticSections.h"
#include "Target.h" #include "Target.h"
#include "llvm/Support/LEB128.h"
using namespace llvm; using namespace llvm;
using namespace llvm::object; using namespace llvm::object;
@@ -41,6 +42,7 @@ class LoongArch final : public TargetInfo {
}; };
} // end anonymous namespace } // end anonymous namespace
namespace {
enum Op { enum Op {
SUB_W = 0x00110000, SUB_W = 0x00110000,
SUB_D = 0x00118000, SUB_D = 0x00118000,
@@ -65,6 +67,7 @@ enum Reg {
R_T2 = 14, R_T2 = 14,
R_T3 = 15, R_T3 = 15,
}; };
} // namespace
// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
@@ -153,6 +156,17 @@ static bool isJirl(uint32_t insn) {
return (insn & 0xfc000000) == JIRL; return (insn & 0xfc000000) == JIRL;
} }
static void handleUleb128(uint8_t *loc, uint64_t val) {
const uint32_t maxcount = 1 + 64 / 7;
uint32_t count;
const char *error = nullptr;
uint64_t orig = decodeULEB128(loc, &count, nullptr, &error);
if (count > maxcount || (count == maxcount && error))
errorOrWarn(getErrorLocation(loc) + "extra space for uleb128");
uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL;
encodeULEB128((orig + val) & mask, loc, count);
}
LoongArch::LoongArch() { LoongArch::LoongArch() {
// The LoongArch ISA itself does not have a limit on page sizes. According to // The LoongArch ISA itself does not have a limit on page sizes. According to
// the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
@@ -394,11 +408,13 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
case R_LARCH_ADD16: case R_LARCH_ADD16:
case R_LARCH_ADD32: case R_LARCH_ADD32:
case R_LARCH_ADD64: case R_LARCH_ADD64:
case R_LARCH_ADD_ULEB128:
case R_LARCH_SUB6: case R_LARCH_SUB6:
case R_LARCH_SUB8: case R_LARCH_SUB8:
case R_LARCH_SUB16: case R_LARCH_SUB16:
case R_LARCH_SUB32: case R_LARCH_SUB32:
case R_LARCH_SUB64: case R_LARCH_SUB64:
case R_LARCH_SUB_ULEB128:
// The LoongArch add/sub relocs behave like the RISCV counterparts; reuse // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
// the RelExpr to avoid code duplication. // the RelExpr to avoid code duplication.
return R_RISCV_ADD; return R_RISCV_ADD;
@@ -633,6 +649,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
case R_LARCH_ADD64: case R_LARCH_ADD64:
write64le(loc, read64le(loc) + val); write64le(loc, read64le(loc) + val);
return; return;
case R_LARCH_ADD_ULEB128:
handleUleb128(loc, val);
return;
case R_LARCH_SUB6: case R_LARCH_SUB6:
*loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f);
return; return;
@@ -648,6 +667,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
case R_LARCH_SUB64: case R_LARCH_SUB64:
write64le(loc, read64le(loc) - val); write64le(loc, read64le(loc) - val);
return; return;
case R_LARCH_SUB_ULEB128:
handleUleb128(loc, -val);
return;
case R_LARCH_MARK_LA: case R_LARCH_MARK_LA:
case R_LARCH_MARK_PCREL: case R_LARCH_MARK_PCREL:
+2 -1
View File
@@ -26,6 +26,7 @@ using namespace lld::elf;
constexpr uint64_t ppc64TocOffset = 0x8000; constexpr uint64_t ppc64TocOffset = 0x8000;
constexpr uint64_t dynamicThreadPointerOffset = 0x8000; constexpr uint64_t dynamicThreadPointerOffset = 0x8000;
namespace {
// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform // The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
// instructions that can be used as part of the initial exec TLS sequence. // instructions that can be used as part of the initial exec TLS sequence.
enum XFormOpcd { enum XFormOpcd {
@@ -139,6 +140,7 @@ enum class PPCPrefixedInsn : uint64_t {
PSTXV = PREFIX_8LS | 0xd8000000, PSTXV = PREFIX_8LS | 0xd8000000,
PSTXVP = PREFIX_8LS | 0xf8000000 PSTXVP = PREFIX_8LS | 0xf8000000
}; };
static bool checkPPCLegacyInsn(uint32_t encoding) { static bool checkPPCLegacyInsn(uint32_t encoding) {
PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding); PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);
if (insn == PPCLegacyInsn::NOINSN) if (insn == PPCLegacyInsn::NOINSN)
@@ -164,7 +166,6 @@ enum class LegacyToPrefixMask : uint64_t {
0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. 0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
}; };
namespace {
class PPC64 final : public TargetInfo { class PPC64 final : public TargetInfo {
public: public:
PPC64(); PPC64();
@@ -57,6 +57,7 @@ class RISCV final : public TargetInfo {
const uint64_t dtpOffset = 0x800; const uint64_t dtpOffset = 0x800;
namespace {
enum Op { enum Op {
ADDI = 0x13, ADDI = 0x13,
AUIPC = 0x17, AUIPC = 0x17,
@@ -78,6 +79,7 @@ enum Reg {
X_A0 = 10, X_A0 = 10,
X_T3 = 28, X_T3 = 28,
}; };
} // namespace
static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
static uint32_t lo12(uint32_t val) { return val & 4095; } static uint32_t lo12(uint32_t val) { return val & 4095; }
+4 -3
View File
@@ -328,9 +328,10 @@ bool X86_64::relaxOnce(int pass) const {
if (rel.expr != R_RELAX_GOT_PC) if (rel.expr != R_RELAX_GOT_PC)
continue; continue;
uint64_t v = sec->getRelocTargetVA( uint64_t v = sec->getRelocTargetVA(sec->file, rel.type, rel.addend,
sec->file, rel.type, rel.addend, sec->getOutputSection()->addr +
sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr); sec->outSecOff + rel.offset,
*rel.sym, rel.expr);
if (isInt<32>(v)) if (isInt<32>(v))
continue; continue;
if (rel.sym->auxIdx == 0) { if (rel.sym->auxIdx == 0) {
+3
View File
@@ -261,6 +261,9 @@ static void demoteDefined(Defined &sym, DenseMap<SectionBase *, size_t> &map) {
Undefined(sym.file, sym.getName(), binding, sym.stOther, sym.type, Undefined(sym.file, sym.getName(), binding, sym.stOther, sym.type,
/*discardedSecIdx=*/map.lookup(sym.section)) /*discardedSecIdx=*/map.lookup(sym.section))
.overwrite(sym); .overwrite(sym);
// Eliminate from the symbol table, otherwise we would leave an undefined
// symbol if the symbol is unreferenced in the absence of GC.
sym.isUsedInRegularObj = false;
} }
// If all references to a DSO happen to be weak, the DSO is not added to // If all references to a DSO happen to be weak, the DSO is not added to
@@ -163,5 +163,10 @@ WebAssembly Improvements
is read from object files within the archive. This matches the behaviour of is read from object files within the archive. This matches the behaviour of
the ELF linker. the ELF linker.
SystemZ
-------
* Add target support for SystemZ (s390x).
Fixes Fixes
##### #####
@@ -406,6 +406,11 @@ bool maskIsAllZeroOrUndef(Value *Mask);
/// lanes can be assumed active. /// lanes can be assumed active.
bool maskIsAllOneOrUndef(Value *Mask); bool maskIsAllOneOrUndef(Value *Mask);
/// Given a mask vector of i1, Return true if any of the elements of this
/// predicate mask are known to be true or undef. That is, return true if at
/// least one lane can be assumed active.
bool maskContainsAllOneOrUndef(Value *Mask);
/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
/// for each lane which may be active. /// for each lane which may be active.
APInt possiblyDemandedEltsInMask(Value *Mask); APInt possiblyDemandedEltsInMask(Value *Mask);
@@ -716,7 +716,10 @@ enum ImportNameType : unsigned {
IMPORT_NAME_NOPREFIX = 2, IMPORT_NAME_NOPREFIX = 2,
/// The import name is the public symbol name, but skipping the leading ?, /// The import name is the public symbol name, but skipping the leading ?,
/// @, or optionally _, and truncating at the first @. /// @, or optionally _, and truncating at the first @.
IMPORT_NAME_UNDECORATE = 3 IMPORT_NAME_UNDECORATE = 3,
/// The import name is specified as a separate string in the import library
/// object file.
IMPORT_NAME_EXPORTAS = 4
}; };
enum class GuardFlags : uint32_t { enum class GuardFlags : uint32_t {
@@ -1362,6 +1362,47 @@ class SectionStrippedError
SectionStrippedError() { setErrorCode(object_error::section_stripped); } SectionStrippedError() { setErrorCode(object_error::section_stripped); }
}; };
inline std::optional<std::string>
getArm64ECMangledFunctionName(StringRef Name) {
bool IsCppFn = Name[0] == '?';
if (IsCppFn && Name.find("$$h") != std::string::npos)
return std::nullopt;
if (!IsCppFn && Name[0] == '#')
return std::nullopt;
StringRef Prefix = "$$h";
size_t InsertIdx = 0;
if (IsCppFn) {
InsertIdx = Name.find("@@");
size_t ThreeAtSignsIdx = Name.find("@@@");
if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) {
InsertIdx += 2;
} else {
InsertIdx = Name.find("@");
if (InsertIdx != std::string::npos)
InsertIdx++;
}
} else {
Prefix = "#";
}
return std::optional<std::string>(
(Name.substr(0, InsertIdx) + Prefix + Name.substr(InsertIdx)).str());
}
inline std::optional<std::string>
getArm64ECDemangledFunctionName(StringRef Name) {
if (Name[0] == '#')
return std::string(Name.substr(1));
if (Name[0] != '?')
return std::nullopt;
std::pair<StringRef, StringRef> Pair = Name.split("$$h");
if (Pair.second.empty())
return std::nullopt;
return (Pair.first + Pair.second).str();
}
} // end namespace object } // end namespace object
} // end namespace llvm } // end namespace llvm
@@ -26,7 +26,16 @@
namespace llvm { namespace llvm {
namespace object { namespace object {
constexpr std::string_view ImportDescriptorPrefix = "__IMPORT_DESCRIPTOR_";
constexpr std::string_view NullImportDescriptorSymbolName =
"__NULL_IMPORT_DESCRIPTOR";
constexpr std::string_view NullThunkDataPrefix = "\x7f";
constexpr std::string_view NullThunkDataSuffix = "_NULL_THUNK_DATA";
class COFFImportFile : public SymbolicFile { class COFFImportFile : public SymbolicFile {
private:
enum SymbolIndex { ImpSymbol, ThunkSymbol, ECAuxSymbol, ECThunkSymbol };
public: public:
COFFImportFile(MemoryBufferRef Source) COFFImportFile(MemoryBufferRef Source)
: SymbolicFile(ID_COFFImportFile, Source) {} : SymbolicFile(ID_COFFImportFile, Source) {}
@@ -36,9 +45,23 @@ class COFFImportFile : public SymbolicFile {
void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; } void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; }
Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override { Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override {
if (Symb.p == 0) switch (Symb.p) {
case ImpSymbol:
OS << "__imp_"; OS << "__imp_";
OS << StringRef(Data.getBufferStart() + sizeof(coff_import_header)); break;
case ECAuxSymbol:
OS << "__imp_aux_";
break;
}
const char *Name = Data.getBufferStart() + sizeof(coff_import_header);
if (Symb.p != ECThunkSymbol && COFF::isArm64EC(getMachine())) {
if (std::optional<std::string> DemangledName =
getArm64ECDemangledFunctionName(Name)) {
OS << StringRef(*DemangledName);
return Error::success();
}
}
OS << StringRef(Name);
return Error::success(); return Error::success();
} }
@@ -52,7 +75,12 @@ class COFFImportFile : public SymbolicFile {
basic_symbol_iterator symbol_end() const override { basic_symbol_iterator symbol_end() const override {
DataRefImpl Symb; DataRefImpl Symb;
Symb.p = isData() ? 1 : 2; if (isData())
Symb.p = ImpSymbol + 1;
else if (COFF::isArm64EC(getMachine()))
Symb.p = ECThunkSymbol + 1;
else
Symb.p = ThunkSymbol + 1;
return BasicSymbolRef(Symb, this); return BasicSymbolRef(Symb, this);
} }
@@ -66,6 +94,7 @@ class COFFImportFile : public SymbolicFile {
uint16_t getMachine() const { return getCOFFImportHeader()->Machine; } uint16_t getMachine() const { return getCOFFImportHeader()->Machine; }
StringRef getFileFormatName() const; StringRef getFileFormatName() const;
StringRef getExportName() const;
private: private:
bool isData() const { bool isData() const {
@@ -52,6 +52,10 @@ class formatted_raw_ostream : public raw_ostream {
/// have the rest of it. /// have the rest of it.
SmallString<4> PartialUTF8Char; SmallString<4> PartialUTF8Char;
/// DisableScan - Temporarily disable scanning of output. Used to ignore color
/// codes.
bool DisableScan;
void write_impl(const char *Ptr, size_t Size) override; void write_impl(const char *Ptr, size_t Size) override;
/// current_pos - Return the current position within the stream, /// current_pos - Return the current position within the stream,
@@ -89,9 +93,33 @@ class formatted_raw_ostream : public raw_ostream {
SetUnbuffered(); SetUnbuffered();
TheStream->SetUnbuffered(); TheStream->SetUnbuffered();
enable_colors(TheStream->colors_enabled());
Scanned = nullptr; Scanned = nullptr;
} }
void PreDisableScan() {
assert(!DisableScan);
ComputePosition(getBufferStart(), GetNumBytesInBuffer());
assert(PartialUTF8Char.empty());
DisableScan = true;
}
void PostDisableScan() {
assert(DisableScan);
DisableScan = false;
Scanned = getBufferStart() + GetNumBytesInBuffer();
}
struct DisableScanScope {
formatted_raw_ostream *S;
DisableScanScope(formatted_raw_ostream *FRO) : S(FRO) {
S->PreDisableScan();
}
~DisableScanScope() { S->PostDisableScan(); }
};
public: public:
/// formatted_raw_ostream - Open the specified file for /// formatted_raw_ostream - Open the specified file for
/// writing. If an error occurs, information about the error is /// writing. If an error occurs, information about the error is
@@ -104,12 +132,12 @@ class formatted_raw_ostream : public raw_ostream {
/// underneath it. /// underneath it.
/// ///
formatted_raw_ostream(raw_ostream &Stream) formatted_raw_ostream(raw_ostream &Stream)
: TheStream(nullptr), Position(0, 0) { : TheStream(nullptr), Position(0, 0), DisableScan(false) {
setStream(Stream); setStream(Stream);
} }
explicit formatted_raw_ostream() : TheStream(nullptr), Position(0, 0) { explicit formatted_raw_ostream()
Scanned = nullptr; : TheStream(nullptr), Position(0, 0), Scanned(nullptr),
} DisableScan(false) {}
~formatted_raw_ostream() override { ~formatted_raw_ostream() override {
flush(); flush();
@@ -136,17 +164,26 @@ class formatted_raw_ostream : public raw_ostream {
} }
raw_ostream &resetColor() override { raw_ostream &resetColor() override {
TheStream->resetColor(); if (colors_enabled()) {
DisableScanScope S(this);
raw_ostream::resetColor();
}
return *this; return *this;
} }
raw_ostream &reverseColor() override { raw_ostream &reverseColor() override {
TheStream->reverseColor(); if (colors_enabled()) {
DisableScanScope S(this);
raw_ostream::reverseColor();
}
return *this; return *this;
} }
raw_ostream &changeColor(enum Colors Color, bool Bold, bool BG) override { raw_ostream &changeColor(enum Colors Color, bool Bold, bool BG) override {
TheStream->changeColor(Color, Bold, BG); if (colors_enabled()) {
DisableScanScope S(this);
raw_ostream::changeColor(Color, Bold, BG);
}
return *this; return *this;
} }
@@ -620,7 +620,7 @@ class SecondFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
: FusionPredicateWithMCInstPredicate<second_fusion_target, pred>; : FusionPredicateWithMCInstPredicate<second_fusion_target, pred>;
// The pred will be applied on both firstMI and secondMI. // The pred will be applied on both firstMI and secondMI.
class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred> class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
: FusionPredicateWithMCInstPredicate<second_fusion_target, pred>; : FusionPredicateWithMCInstPredicate<both_fusion_target, pred>;
// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position // Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position
// `firstOpIdx` should be the same as the operand of `SecondMI` at position // `firstOpIdx` should be the same as the operand of `SecondMI` at position
@@ -5986,6 +5986,8 @@ void llvm::getUnderlyingObjects(const Value *V,
if (!LI || !LI->isLoopHeader(PN->getParent()) || if (!LI || !LI->isLoopHeader(PN->getParent()) ||
isSameUnderlyingObjectInLoop(PN, LI)) isSameUnderlyingObjectInLoop(PN, LI))
append_range(Worklist, PN->incoming_values()); append_range(Worklist, PN->incoming_values());
else
Objects.push_back(P);
continue; continue;
} }
@@ -1012,6 +1012,31 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) {
return true; return true;
} }
bool llvm::maskContainsAllOneOrUndef(Value *Mask) {
assert(isa<VectorType>(Mask->getType()) &&
isa<IntegerType>(Mask->getType()->getScalarType()) &&
cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1 &&
"Mask must be a vector of i1");
auto *ConstMask = dyn_cast<Constant>(Mask);
if (!ConstMask)
return false;
if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
return true;
if (isa<ScalableVectorType>(ConstMask->getType()))
return false;
for (unsigned
I = 0,
E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
I != E; ++I) {
if (auto *MaskElt = ConstMask->getAggregateElement(I))
if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
return true;
}
return false;
}
/// TODO: This is a lot like known bits, but for /// TODO: This is a lot like known bits, but for
/// vectors. Is there something we can common this with? /// vectors. Is there something we can common this with?
APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
@@ -44,6 +44,7 @@
#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h" #include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h" #include "llvm/Support/Casting.h"
@@ -1950,7 +1951,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Lex(); Lex();
} }
if (MAI.hasSubsectionsViaSymbols() && CFIStartProcLoc && Sym->isExternal()) if (MAI.hasSubsectionsViaSymbols() && CFIStartProcLoc &&
Sym->isExternal() && !cast<MCSymbolMachO>(Sym)->isAltEntry())
return Error(StartTokLoc, "non-private labels cannot appear between " return Error(StartTokLoc, "non-private labels cannot appear between "
".cfi_startproc / .cfi_endproc pairs") && ".cfi_startproc / .cfi_endproc pairs") &&
Error(*CFIStartProcLoc, "previous .cfi_startproc was here"); Error(*CFIStartProcLoc, "previous .cfi_startproc was here");
@@ -677,6 +677,13 @@ static bool isECObject(object::SymbolicFile &Obj) {
return false; return false;
} }
bool isImportDescriptor(StringRef Name) {
return Name.starts_with(ImportDescriptorPrefix) ||
Name == StringRef{NullImportDescriptorSymbolName} ||
(Name.starts_with(NullThunkDataPrefix) &&
Name.ends_with(NullThunkDataSuffix));
}
static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj, static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj,
uint16_t Index, uint16_t Index,
raw_ostream &SymNames, raw_ostream &SymNames,
@@ -704,6 +711,10 @@ static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj,
if (Map == &SymMap->Map) { if (Map == &SymMap->Map) {
Ret.push_back(SymNames.tell()); Ret.push_back(SymNames.tell());
SymNames << Name << '\0'; SymNames << Name << '\0';
// If EC is enabled, then the import descriptors are NOT put into EC
// objects so we need to copy them to the EC map manually.
if (SymMap->UseECMap && isImportDescriptor(Name))
SymMap->ECMap[Name] = Index;
} }
} else { } else {
Ret.push_back(SymNames.tell()); Ret.push_back(SymNames.tell());
@@ -52,6 +52,38 @@ StringRef COFFImportFile::getFileFormatName() const {
} }
} }
StringRef COFFImportFile::getExportName() const {
const coff_import_header *hdr = getCOFFImportHeader();
StringRef name = Data.getBuffer().substr(sizeof(*hdr)).split('\0').first;
auto ltrim1 = [](StringRef s, StringRef chars) {
return !s.empty() && chars.contains(s[0]) ? s.substr(1) : s;
};
switch (hdr->getNameType()) {
case IMPORT_ORDINAL:
name = "";
break;
case IMPORT_NAME_NOPREFIX:
name = ltrim1(name, "?@_");
break;
case IMPORT_NAME_UNDECORATE:
name = ltrim1(name, "?@_");
name = name.substr(0, name.find('@'));
break;
case IMPORT_NAME_EXPORTAS: {
// Skip DLL name
name = Data.getBuffer().substr(sizeof(*hdr) + name.size() + 1);
name = name.split('\0').second.split('\0').first;
break;
}
default:
break;
}
return name;
}
static uint16_t getImgRelRelocation(MachineTypes Machine) { static uint16_t getImgRelRelocation(MachineTypes Machine) {
switch (Machine) { switch (Machine) {
default: default:
@@ -76,7 +108,7 @@ template <class T> static void append(std::vector<uint8_t> &B, const T &Data) {
} }
static void writeStringTable(std::vector<uint8_t> &B, static void writeStringTable(std::vector<uint8_t> &B,
ArrayRef<const std::string> Strings) { ArrayRef<const std::string_view> Strings) {
// The COFF string table consists of a 4-byte value which is the size of the // The COFF string table consists of a 4-byte value which is the size of the
// table, including the length field itself. This value is followed by the // table, including the length field itself. This value is followed by the
// string content itself, which is an array of null-terminated C-style // string content itself, which is an array of null-terminated C-style
@@ -139,9 +171,6 @@ static Expected<std::string> replace(StringRef S, StringRef From,
return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str();
} }
static const std::string NullImportDescriptorSymbolName =
"__NULL_IMPORT_DESCRIPTOR";
namespace { namespace {
// This class constructs various small object files necessary to support linking // This class constructs various small object files necessary to support linking
// symbols imported from a DLL. The contents are pretty strictly defined and // symbols imported from a DLL. The contents are pretty strictly defined and
@@ -160,8 +189,9 @@ class ObjectFactory {
public: public:
ObjectFactory(StringRef S, MachineTypes M) ObjectFactory(StringRef S, MachineTypes M)
: NativeMachine(M), ImportName(S), Library(llvm::sys::path::stem(S)), : NativeMachine(M), ImportName(S), Library(llvm::sys::path::stem(S)),
ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), ImportDescriptorSymbolName((ImportDescriptorPrefix + Library).str()),
NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} NullThunkSymbolName(
(NullThunkDataPrefix + Library + NullThunkDataSuffix).str()) {}
// Creates an Import Descriptor. This is a small object file which contains a // Creates an Import Descriptor. This is a small object file which contains a
// reference to the terminators and contains the library name (entry) for the // reference to the terminators and contains the library name (entry) for the
@@ -183,6 +213,7 @@ class ObjectFactory {
// Library Format. // Library Format.
NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal,
ImportType Type, ImportNameType NameType, ImportType Type, ImportNameType NameType,
StringRef ExportName,
MachineTypes Machine); MachineTypes Machine);
// Create a weak external file which is described in PE/COFF Aux Format 3. // Create a weak external file which is described in PE/COFF Aux Format 3.
@@ -474,12 +505,13 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
return {MemoryBufferRef{F, ImportName}}; return {MemoryBufferRef{F, ImportName}};
} }
NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, NewArchiveMember
uint16_t Ordinal, ObjectFactory::createShortImport(StringRef Sym, uint16_t Ordinal,
ImportType ImportType, ImportType ImportType, ImportNameType NameType,
ImportNameType NameType, StringRef ExportName, MachineTypes Machine) {
MachineTypes Machine) {
size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs
if (!ExportName.empty())
ImpSize += ExportName.size() + 1;
size_t Size = sizeof(coff_import_header) + ImpSize; size_t Size = sizeof(coff_import_header) + ImpSize;
char *Buf = Alloc.Allocate<char>(Size); char *Buf = Alloc.Allocate<char>(Size);
memset(Buf, 0, Size); memset(Buf, 0, Size);
@@ -499,6 +531,10 @@ NewArchiveMember ObjectFactory::createShortImport(StringRef Sym,
memcpy(P, Sym.data(), Sym.size()); memcpy(P, Sym.data(), Sym.size());
P += Sym.size() + 1; P += Sym.size() + 1;
memcpy(P, ImportName.data(), ImportName.size()); memcpy(P, ImportName.data(), ImportName.size());
if (!ExportName.empty()) {
P += ImportName.size() + 1;
memcpy(P, ExportName.data(), ExportName.size());
}
return {MemoryBufferRef(StringRef(Buf, Size), ImportName)}; return {MemoryBufferRef(StringRef(Buf, Size), ImportName)};
} }
@@ -615,27 +651,51 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
ImportType = IMPORT_CONST; ImportType = IMPORT_CONST;
StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName;
ImportNameType NameType = E.Noname std::string Name;
? IMPORT_ORDINAL
: getNameType(SymbolName, E.Name,
Machine, MinGW);
Expected<std::string> Name = E.ExtName.empty()
? std::string(SymbolName)
: replace(SymbolName, E.Name, E.ExtName);
if (!Name) if (E.ExtName.empty()) {
return Name.takeError(); Name = std::string(SymbolName);
} else {
Expected<std::string> ReplacedName =
replace(SymbolName, E.Name, E.ExtName);
if (!ReplacedName)
return ReplacedName.takeError();
Name.swap(*ReplacedName);
}
if (!E.AliasTarget.empty() && *Name != E.AliasTarget) { if (!E.AliasTarget.empty() && Name != E.AliasTarget) {
Members.push_back( Members.push_back(
OF.createWeakExternal(E.AliasTarget, *Name, false, Machine)); OF.createWeakExternal(E.AliasTarget, Name, false, Machine));
Members.push_back( Members.push_back(
OF.createWeakExternal(E.AliasTarget, *Name, true, Machine)); OF.createWeakExternal(E.AliasTarget, Name, true, Machine));
continue; continue;
} }
Members.push_back( ImportNameType NameType;
OF.createShortImport(*Name, E.Ordinal, ImportType, NameType, Machine)); std::string ExportName;
if (E.Noname) {
NameType = IMPORT_ORDINAL;
} else {
NameType = getNameType(SymbolName, E.Name, Machine, MinGW);
}
// On ARM64EC, use EXPORTAS to import demangled name for mangled symbols.
if (ImportType == IMPORT_CODE && isArm64EC(Machine)) {
if (std::optional<std::string> MangledName =
getArm64ECMangledFunctionName(Name)) {
if (ExportName.empty()) {
NameType = IMPORT_NAME_EXPORTAS;
ExportName.swap(Name);
}
Name = std::move(*MangledName);
} else if (ExportName.empty()) {
NameType = IMPORT_NAME_EXPORTAS;
ExportName = std::move(*getArm64ECDemangledFunctionName(Name));
}
}
Members.push_back(OF.createShortImport(Name, E.Ordinal, ImportType,
NameType, ExportName, Machine));
} }
return writeArchive(Path, Members, SymtabWritingMode::NormalSymtab, return writeArchive(Path, Members, SymtabWritingMode::NormalSymtab,
@@ -94,6 +94,9 @@ void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
/// ComputePosition - Examine the current output and update line and column /// ComputePosition - Examine the current output and update line and column
/// counts. /// counts.
void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
if (DisableScan)
return;
// If our previous scan pointer is inside the buffer, assume we already // If our previous scan pointer is inside the buffer, assume we already
// scanned those bytes. This depends on raw_ostream to not change our buffer // scanned those bytes. This depends on raw_ostream to not change our buffer
// in unexpected ways. // in unexpected ways.
@@ -24,11 +24,13 @@
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h" #include "llvm/IR/Instruction.h"
#include "llvm/InitializePasses.h" #include "llvm/InitializePasses.h"
#include "llvm/Object/COFF.h"
#include "llvm/Pass.h" #include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/Triple.h"
using namespace llvm; using namespace llvm;
using namespace llvm::object;
using OperandBundleDef = OperandBundleDefT<Value *>; using OperandBundleDef = OperandBundleDefT<Value *>;
@@ -43,6 +45,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
namespace { namespace {
enum class ThunkType { GuestExit, Entry, Exit };
class AArch64Arm64ECCallLowering : public ModulePass { class AArch64Arm64ECCallLowering : public ModulePass {
public: public:
static char ID; static char ID;
@@ -69,14 +73,14 @@ class AArch64Arm64ECCallLowering : public ModulePass {
Type *I64Ty; Type *I64Ty;
Type *VoidTy; Type *VoidTy;
void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk, void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out, FunctionType *&Arm64Ty, raw_ostream &Out, FunctionType *&Arm64Ty,
FunctionType *&X64Ty); FunctionType *&X64Ty);
void getThunkRetType(FunctionType *FT, AttributeList AttrList, void getThunkRetType(FunctionType *FT, AttributeList AttrList,
raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
SmallVectorImpl<Type *> &Arm64ArgTypes, SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr); SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes, SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr); SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
@@ -89,10 +93,11 @@ class AArch64Arm64ECCallLowering : public ModulePass {
void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
AttributeList AttrList, AttributeList AttrList,
bool EntryThunk, raw_ostream &Out, ThunkType TT, raw_ostream &Out,
FunctionType *&Arm64Ty, FunctionType *&Arm64Ty,
FunctionType *&X64Ty) { FunctionType *&X64Ty) {
Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$"); Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$"
: "$iexit_thunk$cdecl$");
Type *Arm64RetTy; Type *Arm64RetTy;
Type *X64RetTy; Type *X64RetTy;
@@ -102,8 +107,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
// The first argument to a thunk is the called function, stored in x9. // The first argument to a thunk is the called function, stored in x9.
// For exit thunks, we pass the called function down to the emulator; // For exit thunks, we pass the called function down to the emulator;
// for entry thunks, we just call the Arm64 function directly. // for entry/guest exit thunks, we just call the Arm64 function directly.
if (!EntryThunk) if (TT == ThunkType::Exit)
Arm64ArgTypes.push_back(PtrTy); Arm64ArgTypes.push_back(PtrTy);
X64ArgTypes.push_back(PtrTy); X64ArgTypes.push_back(PtrTy);
@@ -111,14 +116,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
X64ArgTypes, HasSretPtr); X64ArgTypes, HasSretPtr);
getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr); getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
HasSretPtr);
Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false); Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false); X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
} }
void AArch64Arm64ECCallLowering::getThunkArgTypes( void AArch64Arm64ECCallLowering::getThunkArgTypes(
FunctionType *FT, AttributeList AttrList, raw_ostream &Out, FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes, SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) { SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {
@@ -156,9 +163,11 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
X64ArgTypes.push_back(PtrTy); X64ArgTypes.push_back(PtrTy);
// x5 // x5
Arm64ArgTypes.push_back(I64Ty); Arm64ArgTypes.push_back(I64Ty);
// FIXME: x5 isn't actually passed/used by the x64 side; revisit once we if (TT != ThunkType::Entry) {
// FIXME: x5 isn't actually used by the x64 side; revisit once we
// have proper isel for varargs // have proper isel for varargs
X64ArgTypes.push_back(I64Ty); X64ArgTypes.push_back(I64Ty);
}
return; return;
} }
@@ -339,8 +348,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallString<256> ExitThunkName; SmallString<256> ExitThunkName;
llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
FunctionType *Arm64Ty, *X64Ty; FunctionType *Arm64Ty, *X64Ty;
getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty, getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty);
X64Ty);
if (Function *F = M->getFunction(ExitThunkName)) if (Function *F = M->getFunction(ExitThunkName))
return F; return F;
@@ -443,7 +451,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
SmallString<256> EntryThunkName; SmallString<256> EntryThunkName;
llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
FunctionType *Arm64Ty, *X64Ty; FunctionType *Arm64Ty, *X64Ty;
getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry,
EntryThunkStream, Arm64Ty, X64Ty); EntryThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(EntryThunkName)) if (Function *F = M->getFunction(EntryThunkName))
return F; return F;
@@ -465,10 +473,11 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy(); bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy();
unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1; unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1;
unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size();
// Translate arguments to call. // Translate arguments to call.
SmallVector<Value *> Args; SmallVector<Value *> Args;
for (unsigned i = ThunkArgOffset, e = Thunk->arg_size(); i != e; ++i) { for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) {
Value *Arg = Thunk->getArg(i); Value *Arg = Thunk->getArg(i);
Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset); Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset);
if (ArgTy->isArrayTy() || ArgTy->isStructTy() || if (ArgTy->isArrayTy() || ArgTy->isStructTy() ||
@@ -485,6 +494,22 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Args.push_back(Arg); Args.push_back(Arg);
} }
if (F->isVarArg()) {
// The 5th argument to variadic entry thunks is used to model the x64 sp
// which is passed to the thunk in x4, this can be passed to the callee as
// the variadic argument start address after skipping over the 32 byte
// shadow store.
// The EC thunk CC will assign any argument marked as InReg to x4.
Thunk->addParamAttr(5, Attribute::InReg);
Value *Arg = Thunk->getArg(5);
Arg = IRB.CreatePtrAdd(Arg, IRB.getInt64(0x20));
Args.push_back(Arg);
// Pass in a zero variadic argument size (in x5).
Args.push_back(IRB.getInt64(0));
}
// Call the function passed to the thunk. // Call the function passed to the thunk.
Value *Callee = Thunk->getArg(0); Value *Callee = Thunk->getArg(0);
Callee = IRB.CreateBitCast(Callee, PtrTy); Callee = IRB.CreateBitCast(Callee, PtrTy);
@@ -518,7 +543,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
llvm::raw_null_ostream NullThunkName; llvm::raw_null_ostream NullThunkName;
FunctionType *Arm64Ty, *X64Ty; FunctionType *Arm64Ty, *X64Ty;
getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit,
NullThunkName, Arm64Ty, X64Ty); NullThunkName, Arm64Ty, X64Ty);
auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
assert(MangledName && "Can't guest exit to function that's already native"); assert(MangledName && "Can't guest exit to function that's already native");
@@ -213,6 +213,9 @@ def CC_AArch64_Arm64EC_VarArg : CallingConv<[
// address is passed in X9. // address is passed in X9.
let Entry = 1 in let Entry = 1 in
def CC_AArch64_Arm64EC_Thunk : CallingConv<[ def CC_AArch64_Arm64EC_Thunk : CallingConv<[
// ARM64EC-specific: the InReg attribute can be used to access the x64 sp passed into entry thunks in x4 from the IR.
CCIfInReg<CCIfType<[i64], CCAssignToReg<[X4]>>>,
// Byval aggregates are passed by pointer // Byval aggregates are passed by pointer
CCIfByVal<CCPassIndirect<i64>>, CCIfByVal<CCPassIndirect<i64>>,
@@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
} }
if (IsVarArg && Subtarget->isWindowsArm64EC()) { if (IsVarArg && Subtarget->isWindowsArm64EC()) {
SDValue ParamPtr = StackPtr;
if (IsTailCall) {
// Create a dummy object at the top of the stack that can be used to get
// the SP after the epilogue
int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
ParamPtr = DAG.getFrameIndex(FI, PtrVT);
}
// For vararg calls, the Arm64EC ABI requires values in x4 and x5 // For vararg calls, the Arm64EC ABI requires values in x4 and x5
// describing the argument list. x4 contains the address of the // describing the argument list. x4 contains the address of the
// first stack parameter. x5 contains the size in bytes of all parameters // first stack parameter. x5 contains the size in bytes of all parameters
// passed on the stack. // passed on the stack.
RegsToPass.emplace_back(AArch64::X4, StackPtr); RegsToPass.emplace_back(AArch64::X4, ParamPtr);
RegsToPass.emplace_back(AArch64::X5, RegsToPass.emplace_back(AArch64::X5,
DAG.getConstant(NumBytes, DL, MVT::i64)); DAG.getConstant(NumBytes, DL, MVT::i64));
} }
@@ -23,11 +23,13 @@
#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCStreamer.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/CodeGen.h" #include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
using namespace llvm; using namespace llvm;
using namespace llvm::object;
extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration; extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
@@ -248,34 +248,6 @@ static inline bool atomicBarrierDroppedOnZero(unsigned Opcode) {
return false; return false;
} }
static inline std::optional<std::string>
getArm64ECMangledFunctionName(std::string Name) {
bool IsCppFn = Name[0] == '?';
if (IsCppFn && Name.find("$$h") != std::string::npos)
return std::nullopt;
if (!IsCppFn && Name[0] == '#')
return std::nullopt;
StringRef Prefix = "$$h";
size_t InsertIdx = 0;
if (IsCppFn) {
InsertIdx = Name.find("@@");
size_t ThreeAtSignsIdx = Name.find("@@@");
if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) {
InsertIdx += 2;
} else {
InsertIdx = Name.find("@");
if (InsertIdx != std::string::npos)
InsertIdx++;
}
} else {
Prefix = "#";
}
Name.insert(Name.begin() + InsertIdx, Prefix.begin(), Prefix.end());
return std::optional<std::string>(Name);
}
namespace AArch64CC { namespace AArch64CC {
// The CondCodes constants map directly to the 4-bit encoding of the condition // The CondCodes constants map directly to the 4-bit encoding of the condition
@@ -2781,10 +2781,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
} }
void ARMFrameLowering::processFunctionBeforeFrameFinalized( void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
MachineFunction &MF, RegScavenger *RS) const {
TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
MachineFrameInfo &MFI = MF.getFrameInfo(); MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isCalleeSavedInfoValid()) if (!MFI.isCalleeSavedInfoValid())
return; return;
@@ -2808,6 +2805,12 @@ void ARMFrameLowering::processFunctionBeforeFrameFinalized(
} }
} }
void ARMFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
updateLRRestored(MF);
}
void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
BitVector &SavedRegs) const { BitVector &SavedRegs) const {
TargetFrameLowering::getCalleeSaves(MF, SavedRegs); TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
@@ -59,6 +59,10 @@ class ARMFrameLowering : public TargetFrameLowering {
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override; RegScavenger *RS) const override;
/// Update the IsRestored flag on LR if it is spilled, based on the return
/// instructions.
static void updateLRRestored(MachineFunction &MF);
void processFunctionBeforeFrameFinalized( void processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS = nullptr) const override; MachineFunction &MF, RegScavenger *RS = nullptr) const override;
@@ -2062,17 +2062,6 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
MO.setReg(ARM::PC); MO.setReg(ARM::PC);
PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
MBB.erase(MBBI); MBB.erase(MBBI);
// We now restore LR into PC so it is not live-out of the return block
// anymore: Clear the CSI Restored bit.
MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
// CSI should be fixed after PrologEpilog Insertion
assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
if (Info.getReg() == ARM::LR) {
Info.setRestored(false);
break;
}
}
return true; return true;
} }
} }
@@ -2120,14 +2109,22 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb2 = AFI->isThumb2Function(); isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2; isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false; bool Modified = false, ModifiedLDMReturn = false;
for (MachineBasicBlock &MBB : Fn) { for (MachineBasicBlock &MBB : Fn) {
Modified |= LoadStoreMultipleOpti(MBB); Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress()) if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
Modified |= MergeReturnIntoLDM(MBB); ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
if (isThumb1) if (isThumb1)
Modified |= CombineMovBx(MBB); Modified |= CombineMovBx(MBB);
} }
Modified |= ModifiedLDMReturn;
// If we merged a BX instruction into an LDM, we need to re-calculate whether
// LR is restored. This check needs to consider the whole function, not just
// the instruction(s) we changed, because there may be other BX returns which
// still need LR to be restored.
if (ModifiedLDMReturn)
ARMFrameLowering::updateLRRestored(Fn);
Allocator.DestroyAll(); Allocator.DestroyAll();
return Modified; return Modified;
@@ -1398,7 +1398,7 @@ let mayLoad = 1, hasSideEffects = 0,
// Load indirect with displacement operations. // Load indirect with displacement operations.
let canFoldAsLoad = 1, isReMaterializable = 1 in { let canFoldAsLoad = 1, isReMaterializable = 1 in {
let Constraints = "@earlyclobber $reg" in def LDDRdPtrQ def LDDRdPtrQ
: FSTDLDD<0, : FSTDLDD<0,
(outs GPR8 (outs GPR8
: $reg), : $reg),
@@ -2343,7 +2343,9 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode( return DAG.getNode(
LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
: (MaskIdx0 + MaskLen0 - 1),
DL, GRLenVT),
DAG.getConstant(MaskIdx0, DL, GRLenVT)); DAG.getConstant(MaskIdx0, DL, GRLenVT));
} }
@@ -4940,3 +4942,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
return !isa<ConstantSDNode>(Y); return !isa<ConstantSDNode>(Y);
} }
ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
// TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
return ISD::SIGN_EXTEND;
}
@@ -206,6 +206,8 @@ class LoongArchTargetLowering : public TargetLowering {
return ISD::SIGN_EXTEND; return ISD::SIGN_EXTEND;
} }
ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
Register getRegisterByName(const char *RegName, LLT VT, Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override; const MachineFunction &MF) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
@@ -45,6 +45,11 @@ class LoongArchTargetMachine : public LLVMTargetMachine {
MachineFunctionInfo * MachineFunctionInfo *
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const override; const TargetSubtargetInfo *STI) const override;
// Addrspacecasts are always noops.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
return true;
}
}; };
} // end namespace llvm } // end namespace llvm
@@ -1255,7 +1255,9 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI); emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI);
} }
if (getABI().IsN32()) { #if 0
// We haven't support -mabicalls -mno-shared yet.
if (-mno-shared) {
MCSymbol *GPSym = MCA.getContext().getOrCreateSymbol("__gnu_local_gp"); MCSymbol *GPSym = MCA.getContext().getOrCreateSymbol("__gnu_local_gp");
const MipsMCExpr *HiExpr = MipsMCExpr::create( const MipsMCExpr *HiExpr = MipsMCExpr::create(
MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(GPSym, MCA.getContext()), MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(GPSym, MCA.getContext()),
@@ -1273,6 +1275,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
return; return;
} }
#endif
const MipsMCExpr *HiExpr = MipsMCExpr::createGpOff( const MipsMCExpr *HiExpr = MipsMCExpr::createGpOff(
MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(&Sym, MCA.getContext()), MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(&Sym, MCA.getContext()),
@@ -1288,7 +1291,10 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(), emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
&STI); &STI);
// daddu $gp, $gp, $funcreg // (d)addu $gp, $gp, $funcreg
if (getABI().IsN32())
emitRRR(Mips::ADDu, GPReg, GPReg, RegNo, SMLoc(), &STI);
else
emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI);
} }
@@ -388,18 +388,32 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
Opcode = Mips::XOR; Opcode = Mips::XOR;
break; break;
case Mips::ATOMIC_LOAD_UMIN_I8_POSTRA: case Mips::ATOMIC_LOAD_UMIN_I8_POSTRA:
IsUnsigned = true;
IsMin = true;
break;
case Mips::ATOMIC_LOAD_UMIN_I16_POSTRA: case Mips::ATOMIC_LOAD_UMIN_I16_POSTRA:
IsUnsigned = true; IsUnsigned = true;
[[fallthrough]]; IsMin = true;
break;
case Mips::ATOMIC_LOAD_MIN_I8_POSTRA: case Mips::ATOMIC_LOAD_MIN_I8_POSTRA:
SEOp = Mips::SEB;
IsMin = true;
break;
case Mips::ATOMIC_LOAD_MIN_I16_POSTRA: case Mips::ATOMIC_LOAD_MIN_I16_POSTRA:
IsMin = true; IsMin = true;
break; break;
case Mips::ATOMIC_LOAD_UMAX_I8_POSTRA: case Mips::ATOMIC_LOAD_UMAX_I8_POSTRA:
IsUnsigned = true;
IsMax = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I16_POSTRA: case Mips::ATOMIC_LOAD_UMAX_I16_POSTRA:
IsUnsigned = true; IsUnsigned = true;
[[fallthrough]]; IsMax = true;
break;
case Mips::ATOMIC_LOAD_MAX_I8_POSTRA: case Mips::ATOMIC_LOAD_MAX_I8_POSTRA:
SEOp = Mips::SEB;
IsMax = true;
break;
case Mips::ATOMIC_LOAD_MAX_I16_POSTRA: case Mips::ATOMIC_LOAD_MAX_I16_POSTRA:
IsMax = true; IsMax = true;
break; break;
@@ -461,14 +475,42 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
// For little endian we need to clear uninterested bits. // For little endian we need to clear uninterested bits.
if (STI->isLittle()) { if (STI->isLittle()) {
if (!IsUnsigned) {
BuildMI(loopMBB, DL, TII->get(Mips::SRAV), OldVal)
.addReg(OldVal)
.addReg(ShiftAmnt);
BuildMI(loopMBB, DL, TII->get(Mips::SRAV), Incr)
.addReg(Incr)
.addReg(ShiftAmnt);
if (STI->hasMips32r2()) {
BuildMI(loopMBB, DL, TII->get(SEOp), OldVal).addReg(OldVal);
BuildMI(loopMBB, DL, TII->get(SEOp), Incr).addReg(Incr);
} else {
const unsigned ShiftImm = SEOp == Mips::SEH ? 16 : 24;
BuildMI(loopMBB, DL, TII->get(Mips::SLL), OldVal)
.addReg(OldVal, RegState::Kill)
.addImm(ShiftImm);
BuildMI(loopMBB, DL, TII->get(Mips::SRA), OldVal)
.addReg(OldVal, RegState::Kill)
.addImm(ShiftImm);
BuildMI(loopMBB, DL, TII->get(Mips::SLL), Incr)
.addReg(Incr, RegState::Kill)
.addImm(ShiftImm);
BuildMI(loopMBB, DL, TII->get(Mips::SRA), Incr)
.addReg(Incr, RegState::Kill)
.addImm(ShiftImm);
}
} else {
// and OldVal, OldVal, Mask // and OldVal, OldVal, Mask
// and Incr, Incr, Mask // and Incr, Incr, Mask
BuildMI(loopMBB, DL, TII->get(Mips::AND), OldVal) BuildMI(loopMBB, DL, TII->get(Mips::AND), OldVal)
.addReg(OldVal) .addReg(OldVal)
.addReg(Mask); .addReg(Mask);
BuildMI(loopMBB, DL, TII->get(Mips::AND), Incr).addReg(Incr).addReg(Mask); BuildMI(loopMBB, DL, TII->get(Mips::AND), Incr)
.addReg(Incr)
.addReg(Mask);
}
} }
// unsigned: sltu Scratch4, oldVal, Incr // unsigned: sltu Scratch4, oldVal, Incr
// signed: slt Scratch4, oldVal, Incr // signed: slt Scratch4, oldVal, Incr
BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4) BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4)
@@ -1191,12 +1191,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue; continue;
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
&& !MustSaveCR)
continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location // For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot. // is SP+8, not a frame-relative slot.
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
@@ -14942,6 +14942,7 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
DAG.getVTList(MVT::f64, MVT::Other), DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i8, LDN->getMemOperand()); Ops, MVT::i8, LDN->getMemOperand());
DAG.makeEquivalentMemoryOrdering(LDN, Ld);
// For signed conversion, we need to sign-extend the value in the VSR // For signed conversion, we need to sign-extend the value in the VSR
if (Signed) { if (Signed) {
@@ -3192,7 +3192,8 @@ static std::optional<uint64_t> getExactInteger(const APFloat &APF,
// Note that this method will also match potentially unappealing index // Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for. // determine whether this is worth generating code for.
static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) { static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
unsigned EltSizeInBits) {
unsigned NumElts = Op.getNumOperands(); unsigned NumElts = Op.getNumOperands();
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
bool IsInteger = Op.getValueType().isInteger(); bool IsInteger = Op.getValueType().isInteger();
@@ -3200,7 +3201,7 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
std::optional<unsigned> SeqStepDenom; std::optional<unsigned> SeqStepDenom;
std::optional<int64_t> SeqStepNum, SeqAddend; std::optional<int64_t> SeqStepNum, SeqAddend;
std::optional<std::pair<uint64_t, unsigned>> PrevElt; std::optional<std::pair<uint64_t, unsigned>> PrevElt;
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
for (unsigned Idx = 0; Idx < NumElts; Idx++) { for (unsigned Idx = 0; Idx < NumElts; Idx++) {
// Assume undef elements match the sequence; we just have to be careful // Assume undef elements match the sequence; we just have to be careful
// when interpolating across them. // when interpolating across them.
@@ -3213,14 +3214,14 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (!isa<ConstantSDNode>(Op.getOperand(Idx))) if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
return std::nullopt; return std::nullopt;
Val = Op.getConstantOperandVal(Idx) & Val = Op.getConstantOperandVal(Idx) &
maskTrailingOnes<uint64_t>(EltSizeInBits); maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
} else { } else {
// The BUILD_VECTOR must be all constants. // The BUILD_VECTOR must be all constants.
if (!isa<ConstantFPSDNode>(Op.getOperand(Idx))) if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
return std::nullopt; return std::nullopt;
if (auto ExactInteger = getExactInteger( if (auto ExactInteger = getExactInteger(
cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
EltSizeInBits)) Op.getScalarValueSizeInBits()))
Val = *ExactInteger; Val = *ExactInteger;
else else
return std::nullopt; return std::nullopt;
@@ -3276,11 +3277,11 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
uint64_t Val; uint64_t Val;
if (IsInteger) { if (IsInteger) {
Val = Op.getConstantOperandVal(Idx) & Val = Op.getConstantOperandVal(Idx) &
maskTrailingOnes<uint64_t>(EltSizeInBits); maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
} else { } else {
Val = *getExactInteger( Val = *getExactInteger(
cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
EltSizeInBits); Op.getScalarValueSizeInBits());
} }
uint64_t ExpectedVal = uint64_t ExpectedVal =
(int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
@@ -3550,7 +3551,7 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// Try and match index sequences, which we can lower to the vid instruction // Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by // with optional modifications. An all-undef vector is matched by
// getSplatValue, above. // getSplatValue, above.
if (auto SimpleVID = isSimpleVIDSequence(Op)) { if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
int64_t StepNumerator = SimpleVID->StepNumerator; int64_t StepNumerator = SimpleVID->StepNumerator;
unsigned StepDenominator = SimpleVID->StepDenominator; unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend; int64_t Addend = SimpleVID->Addend;
@@ -15562,7 +15563,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (Index.getOpcode() == ISD::BUILD_VECTOR && if (Index.getOpcode() == ISD::BUILD_VECTOR &&
MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index); // The sequence will be XLenVT, not the type of Index. Tell
// isSimpleVIDSequence this so we avoid overflow.
if (std::optional<VIDSequence> SimpleVID =
isSimpleVIDSequence(Index, Subtarget.getXLen());
SimpleVID && SimpleVID->StepDenominator == 1) { SimpleVID && SimpleVID->StepDenominator == 1) {
const int64_t StepNumerator = SimpleVID->StepNumerator; const int64_t StepNumerator = SimpleVID->StepNumerator;
const int64_t Addend = SimpleVID->Addend; const int64_t Addend = SimpleVID->Addend;
@@ -37,6 +37,9 @@ static cl::opt<unsigned> SLPMaxVF(
InstructionCost InstructionCost
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT, RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
TTI::TargetCostKind CostKind) { TTI::TargetCostKind CostKind) {
// Check if the type is valid for all CostKind
if (!VT.isVector())
return InstructionCost::getInvalid();
size_t NumInstr = OpCodes.size(); size_t NumInstr = OpCodes.size();
if (CostKind == TTI::TCK_CodeSize) if (CostKind == TTI::TCK_CodeSize)
return NumInstr; return NumInstr;
@@ -4252,6 +4252,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
if (N->getValueType(0) == MVT::i128) { if (N->getValueType(0) == MVT::i128) {
unsigned BaseOp = 0; unsigned BaseOp = 0;
unsigned FlagOp = 0; unsigned FlagOp = 0;
bool IsBorrow = false;
switch (Op.getOpcode()) { switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!"); default: llvm_unreachable("Unknown instruction!");
case ISD::UADDO: case ISD::UADDO:
@@ -4261,6 +4262,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
case ISD::USUBO: case ISD::USUBO:
BaseOp = ISD::SUB; BaseOp = ISD::SUB;
FlagOp = SystemZISD::VSCBI; FlagOp = SystemZISD::VSCBI;
IsBorrow = true;
break; break;
} }
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
@@ -4268,6 +4270,9 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
DAG.getValueType(MVT::i1)); DAG.getValueType(MVT::i1));
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
if (IsBorrow)
Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
Flag, DAG.getConstant(1, DL, Flag.getValueType()));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
} }
@@ -4340,6 +4345,7 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
if (VT == MVT::i128) { if (VT == MVT::i128) {
unsigned BaseOp = 0; unsigned BaseOp = 0;
unsigned FlagOp = 0; unsigned FlagOp = 0;
bool IsBorrow = false;
switch (Op.getOpcode()) { switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!"); default: llvm_unreachable("Unknown instruction!");
case ISD::UADDO_CARRY: case ISD::UADDO_CARRY:
@@ -4349,14 +4355,21 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
case ISD::USUBO_CARRY: case ISD::USUBO_CARRY:
BaseOp = SystemZISD::VSBI; BaseOp = SystemZISD::VSBI;
FlagOp = SystemZISD::VSBCBI; FlagOp = SystemZISD::VSBCBI;
IsBorrow = true;
break; break;
} }
if (IsBorrow)
Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
Carry, DAG.getConstant(1, DL, Carry.getValueType()));
Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
DAG.getValueType(MVT::i1)); DAG.getValueType(MVT::i1));
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
if (IsBorrow)
Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
Flag, DAG.getConstant(1, DL, Flag.getValueType()));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
} }
@@ -6611,6 +6624,27 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND(
return NewSelect; return NewSelect;
} }
} }
// Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
// of the result is smaller than the size of X and all the truncated bits
// of X are already zero.
if (N0.getOpcode() == ISD::XOR &&
N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue X = N0.getOperand(0).getOperand(0);
if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
KnownBits Known = DAG.computeKnownBits(X);
APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
N0.getValueSizeInBits(),
VT.getSizeInBits());
if (TruncatedBits.isSubsetOf(Known.Zero)) {
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
X, DAG.getConstant(Mask, SDLoc(N0), VT));
}
}
}
return SDValue(); return SDValue();
} }
@@ -47878,6 +47878,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
SDValue X, Y; SDValue X, Y;
SDValue N0 = N->getOperand(0); SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1); SDValue N1 = N->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Not = GetNot(N0)) { if (SDValue Not = GetNot(N0)) {
X = Not; X = Not;
@@ -47891,9 +47892,11 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
X = DAG.getBitcast(VT, X); X = DAG.getBitcast(VT, X);
Y = DAG.getBitcast(VT, Y); Y = DAG.getBitcast(VT, Y);
SDLoc DL(N); SDLoc DL(N);
// We do not split for SSE at all, but we need to split vectors for AVX1 and // We do not split for SSE at all, but we need to split vectors for AVX1 and
// AVX2. // AVX2.
if (!Subtarget.useAVX512Regs() && VT.is512BitVector()) { if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {
SDValue LoX, HiX; SDValue LoX, HiX;
std::tie(LoX, HiX) = splitVector(X, DAG, DL); std::tie(LoX, HiX) = splitVector(X, DAG, DL);
SDValue LoY, HiY; SDValue LoY, HiY;
@@ -47903,7 +47906,11 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
SDValue HiV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {HiX, HiY}); SDValue HiV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {HiX, HiY});
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoV, HiV}); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoV, HiV});
} }
if (TLI.isTypeLegal(VT))
return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y}); return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y});
return SDValue();
} }
// Try to widen AND, OR and XOR nodes to VT in order to remove casts around // Try to widen AND, OR and XOR nodes to VT in order to remove casts around
@@ -83,6 +83,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>; defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a // A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector // subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -95,6 +96,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>; defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a // A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector // subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -107,6 +109,7 @@ defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>; defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>; defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>; defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>;
// If we're inserting into an all zeros vector, just use a plain move which // If we're inserting into an all zeros vector, just use a plain move which
@@ -6080,6 +6080,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
for (const Instruction &I : instructions(Callee)) { for (const Instruction &I : instructions(Callee)) {
if (const auto *CB = dyn_cast<CallBase>(&I)) { if (const auto *CB = dyn_cast<CallBase>(&I)) {
// Having more target features is fine for inline ASM.
if (CB->isInlineAsm())
continue;
SmallVector<Type *, 8> Types; SmallVector<Type *, 8> Types;
for (Value *Arg : CB->args()) for (Value *Arg : CB->args())
Types.push_back(Arg->getType()); Types.push_back(Arg->getType());
@@ -652,10 +652,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
// check to see if the pointer is guaranteed to not be modified from entry of // check to see if the pointer is guaranteed to not be modified from entry of
// the function to each of the load instructions. // the function to each of the load instructions.
// Because there could be several/many load instructions, remember which
// blocks we know to be transparent to the load.
df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
for (LoadInst *Load : Loads) { for (LoadInst *Load : Loads) {
// Check to see if the load is invalidated from the start of the block to // Check to see if the load is invalidated from the start of the block to
// the load itself. // the load itself.
@@ -669,7 +665,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
// To do this, we perform a depth first search on the inverse CFG from the // To do this, we perform a depth first search on the inverse CFG from the
// loading block. // loading block.
for (BasicBlock *P : predecessors(BB)) { for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) for (BasicBlock *TranspBB : inverse_depth_first(P))
if (AAR.canBasicBlockModify(*TranspBB, Loc)) if (AAR.canBasicBlockModify(*TranspBB, Loc))
return false; return false;
} }
@@ -412,12 +412,15 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) { if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) { if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); if (maskContainsAllOneOrUndef(ConstMask)) {
StoreInst *S = Align Alignment =
new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment); cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
Alignment);
S->copyMetadata(II); S->copyMetadata(II);
return S; return S;
} }
}
// scatter(vector, splat(ptr), splat(true)) -> store extract(vector, // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
// lastlane), ptr // lastlane), ptr
if (ConstMask->isAllOnesValue()) { if (ConstMask->isAllOnesValue()) {
@@ -2156,14 +2156,14 @@ static bool collectInsertionElements(Value *V, unsigned Shift,
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) { for (unsigned i = 0; i != NumElts; ++i) {
unsigned ShiftI = Shift + i * ElementSize; unsigned ShiftI = i * ElementSize;
Constant *Piece = ConstantFoldBinaryInstruction( Constant *Piece = ConstantFoldBinaryInstruction(
Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI)); Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
if (!Piece) if (!Piece)
return false; return false;
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy, if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
isBigEndian)) isBigEndian))
return false; return false;
} }
@@ -6491,6 +6491,13 @@ InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
if (!SafeReplacementConstant) if (!SafeReplacementConstant)
SafeReplacementConstant = CI; SafeReplacementConstant = CI;
} }
} else if (isa<VectorType>(C->getType())) {
// Handle scalable splat
Value *SplatC = C->getSplatValue();
auto *CI = dyn_cast_or_null<ConstantInt>(SplatC);
// Bail out if the constant can't be safely incremented/decremented.
if (!CI || !ConstantIsOk(CI))
return std::nullopt;
} else { } else {
// ConstantExpr? // ConstantExpr?
return std::nullopt; return std::nullopt;
@@ -1284,6 +1284,10 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) { isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) {
if (Value *V = simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ, if (Value *V = simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ,
/* AllowRefinement */ true)) /* AllowRefinement */ true))
// Require either the replacement or the simplification result to be a
// constant to avoid infinite loops.
// FIXME: Make this check more precise.
if (isa<Constant>(CmpRHS) || isa<Constant>(V))
return replaceOperand(Sel, Swapped ? 2 : 1, V); return replaceOperand(Sel, Swapped ? 2 : 1, V);
// Even if TrueVal does not simplify, we can directly replace a use of // Even if TrueVal does not simplify, we can directly replace a use of
@@ -1302,6 +1306,7 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT)) isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT))
if (Value *V = simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ, if (Value *V = simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ,
/* AllowRefinement */ true)) /* AllowRefinement */ true))
if (isa<Constant>(CmpLHS) || isa<Constant>(V))
return replaceOperand(Sel, Swapped ? 2 : 1, V); return replaceOperand(Sel, Swapped ? 2 : 1, V);
auto *FalseInst = dyn_cast<Instruction>(FalseVal); auto *FalseInst = dyn_cast<Instruction>(FalseVal);
@@ -1455,6 +1455,7 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
Value *NewOp, InstCombiner &IC) { Value *NewOp, InstCombiner &IC) {
Instruction *Clone = I.clone(); Instruction *Clone = I.clone();
Clone->replaceUsesOfWith(SI, NewOp); Clone->replaceUsesOfWith(SI, NewOp);
Clone->dropUBImplyingAttrsAndMetadata();
IC.InsertNewInstBefore(Clone, SI->getIterator()); IC.InsertNewInstBefore(Clone, SI->getIterator());
return Clone; return Clone;
} }
@@ -752,11 +752,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
const unsigned ByteSize = 1U << Idx; const unsigned ByteSize = 1U << Idx;
const unsigned BitSize = ByteSize * 8; const unsigned BitSize = ByteSize * 8;
Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
Value *Args[] = {Addr, Value *Val = RMWI->getValOperand();
IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), Value *Args[] = {Addr, IRB.CreateBitOrPointerCast(Val, Ty),
createOrdering(&IRB, RMWI->getOrdering())}; createOrdering(&IRB, RMWI->getOrdering())};
CallInst *C = CallInst::Create(F, Args); Value *C = IRB.CreateCall(F, Args);
ReplaceInstWithInst(I, C); I->replaceAllUsesWith(IRB.CreateBitOrPointerCast(C, Val->getType()));
I->eraseFromParent();
} else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
Value *Addr = CASI->getPointerOperand(); Value *Addr = CASI->getPointerOperand();
Type *OrigOldValTy = CASI->getNewValOperand()->getType(); Type *OrigOldValTy = CASI->getNewValOperand()->getType();
@@ -857,6 +857,9 @@ struct DSEState {
// no longer be captured. // no longer be captured.
bool ShouldIterateEndOfFunctionDSE; bool ShouldIterateEndOfFunctionDSE;
/// Dead instructions to be removed at the end of DSE.
SmallVector<Instruction *> ToRemove;
// Class contains self-reference, make sure it's not copied/moved. // Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete; DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete;
@@ -1692,7 +1695,8 @@ struct DSEState {
return {MaybeDeadAccess}; return {MaybeDeadAccess};
} }
// Delete dead memory defs /// Delete dead memory defs and recursively add their operands to ToRemove if
/// they became dead.
void deleteDeadInstruction(Instruction *SI) { void deleteDeadInstruction(Instruction *SI) {
MemorySSAUpdater Updater(&MSSA); MemorySSAUpdater Updater(&MSSA);
SmallVector<Instruction *, 32> NowDeadInsts; SmallVector<Instruction *, 32> NowDeadInsts;
@@ -1708,8 +1712,11 @@ struct DSEState {
salvageKnowledge(DeadInst); salvageKnowledge(DeadInst);
// Remove the Instruction from MSSA. // Remove the Instruction from MSSA.
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { bool IsMemDef = MA && isa<MemoryDef>(MA);
if (MA) {
if (IsMemDef) {
auto *MD = cast<MemoryDef>(MA);
SkipStores.insert(MD); SkipStores.insert(MD);
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) { if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
if (SI->getValueOperand()->getType()->isPointerTy()) { if (SI->getValueOperand()->getType()->isPointerTy()) {
@@ -1730,13 +1737,21 @@ struct DSEState {
// Remove its operands // Remove its operands
for (Use &O : DeadInst->operands()) for (Use &O : DeadInst->operands())
if (Instruction *OpI = dyn_cast<Instruction>(O)) { if (Instruction *OpI = dyn_cast<Instruction>(O)) {
O = nullptr; O.set(PoisonValue::get(O->getType()));
if (isInstructionTriviallyDead(OpI, &TLI)) if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI); NowDeadInsts.push_back(OpI);
} }
EI.removeInstruction(DeadInst); EI.removeInstruction(DeadInst);
// Remove memory defs directly if they don't produce results, but only
// queue other dead instructions for later removal. They may have been
// used as memory locations that have been cached by BatchAA. Removing
// them here may lead to newly created instructions to be allocated at the
// same address, yielding stale cache entries.
if (IsMemDef && DeadInst->getType()->isVoidTy())
DeadInst->eraseFromParent(); DeadInst->eraseFromParent();
else
ToRemove.push_back(DeadInst);
} }
} }
@@ -2233,6 +2248,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
MadeChange |= State.eliminateRedundantStoresOfExistingValues(); MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
while (!State.ToRemove.empty()) {
Instruction *DeadInst = State.ToRemove.pop_back_val();
DeadInst->eraseFromParent();
}
return MadeChange; return MadeChange;
} }
} // end anonymous namespace } // end anonymous namespace
@@ -2257,6 +2257,41 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
return nullptr; return nullptr;
} }
static VectorType *createAndCheckVectorTypesForPromotion(
SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
function_ref<void(Type *)> CheckCandidateType, Partition &P,
const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
[[maybe_unused]] VectorType *OriginalElt =
CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
// Consider additional vector types where the element type size is a
// multiple of load/store element size.
for (Type *Ty : OtherTys) {
if (!VectorType::isValidElementType(Ty))
continue;
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
// Make a copy of CandidateTys and iterate through it, because we
// might append to CandidateTys in the loop.
for (VectorType *const VTy : CandidateTysCopy) {
// The elements in the copy should remain invariant throughout the loop
assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
unsigned ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
if (TypeSize != VectorSize && TypeSize != ElementSize &&
VectorSize % TypeSize == 0) {
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
CheckCandidateType(NewVTy);
}
}
}
return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy);
}
/// Test whether the given alloca partitioning and range of slices can be /// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector. /// promoted to a vector.
/// ///
@@ -2271,6 +2306,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// we have different element types. // we have different element types.
SmallVector<VectorType *, 4> CandidateTys; SmallVector<VectorType *, 4> CandidateTys;
SetVector<Type *> LoadStoreTys; SetVector<Type *> LoadStoreTys;
SetVector<Type *> DeferredTys;
Type *CommonEltTy = nullptr; Type *CommonEltTy = nullptr;
VectorType *CommonVecPtrTy = nullptr; VectorType *CommonVecPtrTy = nullptr;
bool HaveVecPtrTy = false; bool HaveVecPtrTy = false;
@@ -2314,42 +2350,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
Ty = SI->getValueOperand()->getType(); Ty = SI->getValueOperand()->getType();
else else
continue; continue;
auto CandTy = Ty->getScalarType();
if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
S.endOffset() != P.endOffset())) {
DeferredTys.insert(Ty);
continue;
}
LoadStoreTys.insert(Ty); LoadStoreTys.insert(Ty);
// Consider any loads or stores that are the exact size of the slice. // Consider any loads or stores that are the exact size of the slice.
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset()) if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
CheckCandidateType(Ty); CheckCandidateType(Ty);
} }
if (auto *VTy = checkVectorTypesForPromotion( SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, if (auto *VTy = createAndCheckVectorTypesForPromotion(
LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy)) HaveCommonVecPtrTy, CommonVecPtrTy))
return VTy; return VTy;
// Consider additional vector types where the element type size is a
// multiple of load/store element size.
for (Type *Ty : LoadStoreTys) {
if (!VectorType::isValidElementType(Ty))
continue;
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
// Make a copy of CandidateTys and iterate through it, because we might
// append to CandidateTys in the loop.
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
CandidateTys.clear(); CandidateTys.clear();
for (VectorType *&VTy : CandidateTysCopy) { return createAndCheckVectorTypesForPromotion(
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
unsigned ElementSize = HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); CommonVecPtrTy);
if (TypeSize != VectorSize && TypeSize != ElementSize &&
VectorSize % TypeSize == 0) {
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
CheckCandidateType(NewVTy);
}
}
}
return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy);
} }
/// Test whether a slice of an alloca is valid for integer widening. /// Test whether a slice of an alloca is valid for integer widening.
@@ -547,11 +547,6 @@ int main(int argc, char **argv) {
std::unique_ptr<MCAsmBackend> MAB( std::unique_ptr<MCAsmBackend> MAB(
TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions));
auto FOut = std::make_unique<formatted_raw_ostream>(*OS); auto FOut = std::make_unique<formatted_raw_ostream>(*OS);
// FIXME: Workaround for bug in formatted_raw_ostream. Color escape codes
// are (incorrectly) written directly to the unbuffered raw_ostream wrapped
// by the formatted_raw_ostream.
if (Action == AC_CDisassemble)
FOut->SetUnbuffered();
Str.reset( Str.reset(
TheTarget->createAsmStreamer(Ctx, std::move(FOut), /*asmverbose*/ true, TheTarget->createAsmStreamer(Ctx, std::move(FOut), /*asmverbose*/ true,
/*useDwarfDirectory*/ true, IP, /*useDwarfDirectory*/ true, IP,
@@ -2032,13 +2032,6 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
formatted_raw_ostream FOS(outs()); formatted_raw_ostream FOS(outs());
// FIXME: Workaround for bug in formatted_raw_ostream. Color escape codes
// are (incorrectly) written directly to the unbuffered raw_ostream
// wrapped by the formatted_raw_ostream.
if (DisassemblyColor == ColorOutput::Enable ||
DisassemblyColor == ColorOutput::Auto)
FOS.SetUnbuffered();
std::unordered_map<uint64_t, std::string> AllLabels; std::unordered_map<uint64_t, std::string> AllLabels;
std::unordered_map<uint64_t, std::vector<BBAddrMapLabel>> BBAddrMapLabels; std::unordered_map<uint64_t, std::vector<BBAddrMapLabel>> BBAddrMapLabels;
if (SymbolizeOperands) { if (SymbolizeOperands) {
@@ -45,8 +45,14 @@ void dumpCOFFImportFile(const COFFImportFile *File, ScopedPrinter &Writer) {
case COFF::IMPORT_NAME_UNDECORATE: case COFF::IMPORT_NAME_UNDECORATE:
Writer.printString("Name type", "undecorate"); Writer.printString("Name type", "undecorate");
break; break;
case COFF::IMPORT_NAME_EXPORTAS:
Writer.printString("Name type", "export as");
break;
} }
if (H->getNameType() != COFF::IMPORT_ORDINAL)
Writer.printString("Export name", File->getExportName());
for (const object::BasicSymbolRef &Sym : File->symbols()) { for (const object::BasicSymbolRef &Sym : File->symbols()) {
raw_ostream &OS = Writer.startLine(); raw_ostream &OS = Writer.startLine();
OS << "Symbol: "; OS << "Symbol: ";
@@ -152,8 +152,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
<< "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n"; << "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n";
OS.indent(4) << " return false;\n"; OS.indent(4) << " return false;\n";
OS.indent(2) << "}\n"; OS.indent(2) << "}\n";
} else if (Predicate->isSubClassOf( } else if (Predicate->isSubClassOf("FusionPredicateWithMCInstPredicate")) {
"FirstFusionPredicateWithMCInstPredicate")) {
OS.indent(2) << "{\n"; OS.indent(2) << "{\n";
OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; OS.indent(4) << "const MachineInstr *MI = FirstMI;\n";
OS.indent(4) << "if ("; OS.indent(4) << "if (";
@@ -173,7 +172,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
PredicateExpander &PE, PredicateExpander &PE,
raw_ostream &OS) { raw_ostream &OS) {
if (Predicate->isSubClassOf("SecondFusionPredicateWithMCInstPredicate")) { if (Predicate->isSubClassOf("FusionPredicateWithMCInstPredicate")) {
OS.indent(2) << "{\n"; OS.indent(2) << "{\n";
OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n";
OS.indent(4) << "if ("; OS.indent(4) << "if (";
@@ -185,7 +184,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
OS.indent(2) << "}\n"; OS.indent(2) << "}\n";
} else { } else {
PrintFatalError(Predicate->getLoc(), PrintFatalError(Predicate->getLoc(),
"Unsupported predicate for first instruction: " + "Unsupported predicate for second instruction: " +
Predicate->getType()->getAsString()); Predicate->getType()->getAsString());
} }
} }
@@ -196,9 +195,8 @@ void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate,
if (Predicate->isSubClassOf("FusionPredicateWithCode")) if (Predicate->isSubClassOf("FusionPredicateWithCode"))
OS << Predicate->getValueAsString("Predicate"); OS << Predicate->getValueAsString("Predicate");
else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) { else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) {
Record *MCPred = Predicate->getValueAsDef("Predicate"); emitFirstPredicate(Predicate, PE, OS);
emitFirstPredicate(MCPred, PE, OS); emitSecondPredicate(Predicate, PE, OS);
emitSecondPredicate(MCPred, PE, OS);
} else if (Predicate->isSubClassOf("TieReg")) { } else if (Predicate->isSubClassOf("TieReg")) {
int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx"); int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx");
int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx"); int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx");
@@ -818,6 +818,7 @@ class KMPAffinity {
typedef KMPAffinity::Mask kmp_affin_mask_t; typedef KMPAffinity::Mask kmp_affin_mask_t;
extern KMPAffinity *__kmp_affinity_dispatch; extern KMPAffinity *__kmp_affinity_dispatch;
#ifndef KMP_OS_AIX
class kmp_affinity_raii_t { class kmp_affinity_raii_t {
kmp_affin_mask_t *mask; kmp_affin_mask_t *mask;
bool restored; bool restored;
@@ -842,6 +843,7 @@ class kmp_affinity_raii_t {
} }
~kmp_affinity_raii_t() { restore(); } ~kmp_affinity_raii_t() { restore(); }
}; };
#endif // !KMP_OS_AIX
// Declare local char buffers with this size for printing debug and info // Declare local char buffers with this size for printing debug and info
// messages, using __kmp_affinity_print_mask(). // messages, using __kmp_affinity_print_mask().
@@ -2506,7 +2508,7 @@ typedef struct kmp_depend_info {
union { union {
kmp_uint8 flag; // flag as an unsigned char kmp_uint8 flag; // flag as an unsigned char
struct { // flag as a set of 8 bits struct { // flag as a set of 8 bits
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Same fields as in the #else branch, but in reverse order */ /* Same fields as in the #else branch, but in reverse order */
unsigned all : 1; unsigned all : 1;
unsigned unused : 3; unsigned unused : 3;
@@ -2671,7 +2673,7 @@ typedef struct kmp_task_stack {
#endif // BUILD_TIED_TASK_STACK #endif // BUILD_TIED_TASK_STACK
typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Same fields as in the #else branch, but in reverse order */ /* Same fields as in the #else branch, but in reverse order */
#if OMPX_TASKGRAPH #if OMPX_TASKGRAPH
unsigned reserved31 : 6; unsigned reserved31 : 6;
@@ -3911,7 +3913,7 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
#if KMP_WEIGHTED_ITERATIONS_SUPPORTED #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
extern int __kmp_get_first_osid_with_ecore(void); extern int __kmp_get_first_osid_with_ecore(void);
#endif #endif
#if KMP_OS_LINUX || KMP_OS_FREEBSD #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX
extern int kmp_set_thread_affinity_mask_initial(void); extern int kmp_set_thread_affinity_mask_initial(void);
#endif #endif
static inline void __kmp_assign_root_init_mask() { static inline void __kmp_assign_root_init_mask() {
@@ -2906,12 +2906,17 @@ static inline const char *__kmp_cpuinfo_get_envvar() {
} }
// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
// affinity map. // affinity map. On AIX, the map is obtained through system SRAD (Scheduler
// Resource Allocation Domain).
static bool __kmp_affinity_create_cpuinfo_map(int *line, static bool __kmp_affinity_create_cpuinfo_map(int *line,
kmp_i18n_id_t *const msg_id) { kmp_i18n_id_t *const msg_id) {
*msg_id = kmp_i18n_null;
#if KMP_OS_AIX
unsigned num_records = __kmp_xproc;
#else
const char *filename = __kmp_cpuinfo_get_filename(); const char *filename = __kmp_cpuinfo_get_filename();
const char *envvar = __kmp_cpuinfo_get_envvar(); const char *envvar = __kmp_cpuinfo_get_envvar();
*msg_id = kmp_i18n_null;
if (__kmp_affinity.flags.verbose) { if (__kmp_affinity.flags.verbose) {
KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
@@ -2970,6 +2975,7 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
*msg_id = kmp_i18n_str_CantRewindCpuinfo; *msg_id = kmp_i18n_str_CantRewindCpuinfo;
return false; return false;
} }
#endif // KMP_OS_AIX
// Allocate the array of records to store the proc info in. The dummy // Allocate the array of records to store the proc info in. The dummy
// element at the end makes the logic in filling them out easier to code. // element at the end makes the logic in filling them out easier to code.
@@ -2999,6 +3005,99 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
INIT_PROC_INFO(threadInfo[i]); INIT_PROC_INFO(threadInfo[i]);
} }
#if KMP_OS_AIX
int smt_threads;
lpar_info_format1_t cpuinfo;
unsigned num_avail = __kmp_xproc;
if (__kmp_affinity.flags.verbose)
KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "system info for topology");
// Get the number of SMT threads per core.
int retval =
lpar_get_info(LPAR_INFO_FORMAT1, &cpuinfo, sizeof(lpar_info_format1_t));
if (!retval)
smt_threads = cpuinfo.smt_threads;
else {
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_UnknownTopology;
return false;
}
// Allocate a resource set containing available system resourses.
rsethandle_t sys_rset = rs_alloc(RS_SYSTEM);
if (sys_rset == NULL) {
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_UnknownTopology;
return false;
}
// Allocate a resource set for the SRAD info.
rsethandle_t srad = rs_alloc(RS_EMPTY);
if (srad == NULL) {
rs_free(sys_rset);
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_UnknownTopology;
return false;
}
// Get the SRAD system detail level.
int sradsdl = rs_getinfo(NULL, R_SRADSDL, 0);
if (sradsdl < 0) {
rs_free(sys_rset);
rs_free(srad);
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_UnknownTopology;
return false;
}
// Get the number of RADs at that SRAD SDL.
int num_rads = rs_numrads(sys_rset, sradsdl, 0);
if (num_rads < 0) {
rs_free(sys_rset);
rs_free(srad);
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_UnknownTopology;
return false;
}
// Get the maximum number of procs that may be contained in a resource set.
int max_procs = rs_getinfo(NULL, R_MAXPROCS, 0);
if (max_procs < 0) {
rs_free(sys_rset);
rs_free(srad);
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_UnknownTopology;
return false;
}
int cur_rad = 0;
int num_set = 0;
for (int srad_idx = 0; cur_rad < num_rads && srad_idx < VMI_MAXRADS;
++srad_idx) {
// Check if the SRAD is available in the RSET.
if (rs_getrad(sys_rset, srad, sradsdl, srad_idx, 0) < 0)
continue;
for (int cpu = 0; cpu < max_procs; cpu++) {
// Set the info for the cpu if it is in the SRAD.
if (rs_op(RS_TESTRESOURCE, srad, NULL, R_PROCS, cpu)) {
threadInfo[cpu][osIdIndex] = cpu;
threadInfo[cpu][pkgIdIndex] = cur_rad;
threadInfo[cpu][coreIdIndex] = cpu / smt_threads;
++num_set;
if (num_set >= num_avail) {
// Done if all available CPUs have been set.
break;
}
}
}
++cur_rad;
}
rs_free(sys_rset);
rs_free(srad);
// The topology is already sorted.
#else // !KMP_OS_AIX
unsigned num_avail = 0; unsigned num_avail = 0;
*line = 0; *line = 0;
#if KMP_ARCH_S390X #if KMP_ARCH_S390X
@@ -3246,6 +3345,8 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
qsort(threadInfo, num_avail, sizeof(*threadInfo), qsort(threadInfo, num_avail, sizeof(*threadInfo),
__kmp_affinity_cmp_ProcCpuInfo_phys_id); __kmp_affinity_cmp_ProcCpuInfo_phys_id);
#endif // KMP_OS_AIX
// The table is now sorted by pkgId / coreId / threadId, but we really don't // The table is now sorted by pkgId / coreId / threadId, but we really don't
// know the radix of any of the fields. pkgId's may be sparsely assigned among // know the radix of any of the fields. pkgId's may be sparsely assigned among
// the chips on a system. Although coreId's are usually assigned // the chips on a system. Although coreId's are usually assigned
@@ -4441,7 +4542,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
} }
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
#if KMP_OS_LINUX #if KMP_OS_LINUX || KMP_OS_AIX
if (!success) { if (!success) {
int line = 0; int line = 0;
success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id); success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
@@ -4837,7 +4938,12 @@ void __kmp_affinity_uninitialize(void) {
} }
if (__kmp_affin_origMask != NULL) { if (__kmp_affin_origMask != NULL) {
if (KMP_AFFINITY_CAPABLE()) { if (KMP_AFFINITY_CAPABLE()) {
#if KMP_OS_AIX
// Uninitialize by unbinding the thread.
bindprocessor(BINDTHREAD, thread_self(), PROCESSOR_CLASS_ANY);
#else
__kmp_set_system_affinity(__kmp_affin_origMask, FALSE); __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
#endif
} }
KMP_CPU_FREE(__kmp_affin_origMask); KMP_CPU_FREE(__kmp_affin_origMask);
__kmp_affin_origMask = NULL; __kmp_affin_origMask = NULL;
@@ -5011,7 +5117,10 @@ void __kmp_affinity_bind_init_mask(int gtid) {
__kmp_set_system_affinity(th->th.th_affin_mask, FALSE); __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
} else } else
#endif #endif
#ifndef KMP_OS_AIX
// Do not set the full mask as the init mask on AIX.
__kmp_set_system_affinity(th->th.th_affin_mask, TRUE); __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
#endif
} }
void __kmp_affinity_bind_place(int gtid) { void __kmp_affinity_bind_place(int gtid) {
@@ -5124,7 +5233,7 @@ int __kmp_aux_set_affinity(void **mask) {
int __kmp_aux_get_affinity(void **mask) { int __kmp_aux_get_affinity(void **mask) {
int gtid; int gtid;
int retval; int retval;
#if KMP_OS_WINDOWS || KMP_DEBUG #if KMP_OS_WINDOWS || KMP_OS_AIX || KMP_DEBUG
kmp_info_t *th; kmp_info_t *th;
#endif #endif
if (!KMP_AFFINITY_CAPABLE()) { if (!KMP_AFFINITY_CAPABLE()) {
@@ -5132,7 +5241,7 @@ int __kmp_aux_get_affinity(void **mask) {
} }
gtid = __kmp_entry_gtid(); gtid = __kmp_entry_gtid();
#if KMP_OS_WINDOWS || KMP_DEBUG #if KMP_OS_WINDOWS || KMP_OS_AIX || KMP_DEBUG
th = __kmp_threads[gtid]; th = __kmp_threads[gtid];
#else #else
(void)gtid; // unused variable (void)gtid; // unused variable
@@ -5155,7 +5264,7 @@ int __kmp_aux_get_affinity(void **mask) {
} }
} }
#if !KMP_OS_WINDOWS #if !KMP_OS_WINDOWS && !KMP_OS_AIX
retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
KA_TRACE( KA_TRACE(
@@ -5175,7 +5284,7 @@ int __kmp_aux_get_affinity(void **mask) {
KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
return 0; return 0;
#endif /* KMP_OS_WINDOWS */ #endif /* !KMP_OS_WINDOWS && !KMP_OS_AIX */
} }
int __kmp_aux_get_affinity_max_proc() { int __kmp_aux_get_affinity_max_proc() {
@@ -5557,7 +5666,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
} }
} }
#if KMP_OS_LINUX || KMP_OS_FREEBSD #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX
// We don't need this entry for Windows because // We don't need this entry for Windows because
// there is GetProcessAffinityMask() api // there is GetProcessAffinityMask() api
// //
@@ -5592,7 +5701,11 @@ extern "C"
"set full mask for thread %d\n", "set full mask for thread %d\n",
gtid)); gtid));
KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL); KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
#if KMP_OS_AIX
return bindprocessor(BINDTHREAD, thread_self(), PROCESSOR_CLASS_ANY);
#else
return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE); return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
#endif
} }
#endif #endif
@@ -191,7 +191,7 @@ class KMPHwlocAffinity : public KMPAffinity {
}; };
#endif /* KMP_USE_HWLOC */ #endif /* KMP_USE_HWLOC */
#if KMP_OS_LINUX || KMP_OS_FREEBSD #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX
#if KMP_OS_LINUX #if KMP_OS_LINUX
/* On some of the older OS's that we build on, these constants aren't present /* On some of the older OS's that we build on, these constants aren't present
in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
@@ -314,6 +314,10 @@ class KMPHwlocAffinity : public KMPAffinity {
#elif KMP_OS_FREEBSD #elif KMP_OS_FREEBSD
#include <pthread.h> #include <pthread.h>
#include <pthread_np.h> #include <pthread_np.h>
#elif KMP_OS_AIX
#include <sys/dr.h>
#include <sys/rset.h>
#define VMI_MAXRADS 64 // Maximum number of RADs allowed by AIX.
#endif #endif
class KMPNativeAffinity : public KMPAffinity { class KMPNativeAffinity : public KMPAffinity {
class Mask : public KMPAffinity::Mask { class Mask : public KMPAffinity::Mask {
@@ -401,6 +405,70 @@ class KMPNativeAffinity : public KMPAffinity {
++retval; ++retval;
return retval; return retval;
} }
#if KMP_OS_AIX
// On AIX, we don't have a way to get CPU(s) a thread is bound to.
// This routine is only used to get the full mask.
int get_system_affinity(bool abort_on_error) override {
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
"Illegal get affinity operation when not capable");
(void)abort_on_error;
// Set the mask with all CPUs that are available.
for (int i = 0; i < __kmp_xproc; ++i)
KMP_CPU_SET(i, this);
return 0;
}
int set_system_affinity(bool abort_on_error) const override {
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
"Illegal set affinity operation when not capable");
int location;
int gtid = __kmp_entry_gtid();
int tid = thread_self();
// Unbind the thread if it was bound to any processors before so that
// we can bind the thread to CPUs specified by the mask not others.
int retval = bindprocessor(BINDTHREAD, tid, PROCESSOR_CLASS_ANY);
// On AIX, we can only bind to one instead of a set of CPUs with the
// bindprocessor() system call.
KMP_CPU_SET_ITERATE(location, this) {
if (KMP_CPU_ISSET(location, this)) {
retval = bindprocessor(BINDTHREAD, tid, location);
if (retval == -1 && errno == 1) {
rsid_t rsid;
rsethandle_t rsh;
// Put something in rsh to prevent compiler warning
// about uninitalized use
rsh = rs_alloc(RS_EMPTY);
rsid.at_pid = getpid();
if (RS_DEFAULT_RSET != ra_getrset(R_PROCESS, rsid, 0, rsh)) {
retval = ra_detachrset(R_PROCESS, rsid, 0);
retval = bindprocessor(BINDTHREAD, tid, location);
}
}
if (retval == 0) {
KA_TRACE(10, ("__kmp_set_system_affinity: Done binding "
"T#%d to cpu=%d.\n",
gtid, location));
continue;
}
int error = errno;
if (abort_on_error) {
__kmp_fatal(KMP_MSG(FunctionError, "bindprocessor()"),
KMP_ERR(error), __kmp_msg_null);
KA_TRACE(10, ("__kmp_set_system_affinity: Error binding "
"T#%d to cpu=%d, errno=%d.\n",
gtid, location, error));
return error;
}
}
}
return 0;
}
#else // !KMP_OS_AIX
int get_system_affinity(bool abort_on_error) override { int get_system_affinity(bool abort_on_error) override {
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
"Illegal get affinity operation when not capable"); "Illegal get affinity operation when not capable");
@@ -443,6 +511,7 @@ class KMPNativeAffinity : public KMPAffinity {
} }
return error; return error;
} }
#endif // KMP_OS_AIX
}; };
void determine_capable(const char *env_var) override { void determine_capable(const char *env_var) override {
__kmp_affinity_determine_capable(env_var); __kmp_affinity_determine_capable(env_var);
@@ -471,7 +540,7 @@ class KMPNativeAffinity : public KMPAffinity {
} }
api_type get_api_type() const override { return NATIVE_OS; } api_type get_api_type() const override { return NATIVE_OS; }
}; };
#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX */
#if KMP_OS_WINDOWS #if KMP_OS_WINDOWS
class KMPNativeAffinity : public KMPAffinity { class KMPNativeAffinity : public KMPAffinity {
@@ -120,7 +120,8 @@ extern void __kmp_validate_locks(void);
struct kmp_base_tas_lock { struct kmp_base_tas_lock {
// KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) && \
__LP64__
// Flip the ordering of the high and low 32-bit member to be consistent // Flip the ordering of the high and low 32-bit member to be consistent
// with the memory layout of the address in 64-bit big-endian. // with the memory layout of the address in 64-bit big-endian.
kmp_int32 depth_locked; // depth locked, for nested locks only kmp_int32 depth_locked; // depth locked, for nested locks only
@@ -75,7 +75,8 @@
#error Unknown compiler #error Unknown compiler
#endif #endif
#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_WASI #if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD || KMP_OS_AIX) && \
!KMP_OS_WASI
#define KMP_AFFINITY_SUPPORTED 1 #define KMP_AFFINITY_SUPPORTED 1
#if KMP_OS_WINDOWS && KMP_ARCH_X86_64 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
#define KMP_GROUP_AFFINITY 1 #define KMP_GROUP_AFFINITY 1
@@ -116,7 +116,7 @@ static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
} }
#endif #endif
#if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED) #if ((KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX) && KMP_AFFINITY_SUPPORTED)
/* Affinity support */ /* Affinity support */
@@ -132,6 +132,29 @@ void __kmp_affinity_bind_thread(int which) {
KMP_CPU_FREE_FROM_STACK(mask); KMP_CPU_FREE_FROM_STACK(mask);
} }
#if KMP_OS_AIX
void __kmp_affinity_determine_capable(const char *env_var) {
// All versions of AIX support bindprocessor().
size_t mask_size = __kmp_xproc / CHAR_BIT;
// Round up to byte boundary.
if (__kmp_xproc % CHAR_BIT)
++mask_size;
// Round up to the mask_size_type boundary.
if (mask_size % sizeof(__kmp_affin_mask_size))
mask_size += sizeof(__kmp_affin_mask_size) -
mask_size % sizeof(__kmp_affin_mask_size);
KMP_AFFINITY_ENABLE(mask_size);
KA_TRACE(10,
("__kmp_affinity_determine_capable: "
"AIX OS affinity interface bindprocessor functional (mask size = "
"%" KMP_SIZE_T_SPEC ").\n",
__kmp_affin_mask_size));
}
#else // !KMP_OS_AIX
/* Determine if we can access affinity functionality on this version of /* Determine if we can access affinity functionality on this version of
* Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
* __kmp_affin_mask_size to the appropriate value (0 means not capable). */ * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
@@ -259,8 +282,9 @@ void __kmp_affinity_determine_capable(const char *env_var) {
KMP_WARNING(AffCantGetMaskSize, env_var); KMP_WARNING(AffCantGetMaskSize, env_var);
} }
} }
#endif // KMP_OS_AIX
#endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED #endif // (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX) &&
// KMP_AFFINITY_SUPPORTED
#if KMP_USE_FUTEX #if KMP_USE_FUTEX
@@ -476,7 +500,7 @@ static void *__kmp_launch_worker(void *thr) {
#endif /* KMP_BLOCK_SIGNALS */ #endif /* KMP_BLOCK_SIGNALS */
void *exit_val; void *exit_val;
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX
void *volatile padding = 0; void *volatile padding = 0;
#endif #endif
int gtid; int gtid;
@@ -525,7 +549,7 @@ static void *__kmp_launch_worker(void *thr) {
#endif /* KMP_BLOCK_SIGNALS */ #endif /* KMP_BLOCK_SIGNALS */
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX
if (__kmp_stkoffset > 0 && gtid > 0) { if (__kmp_stkoffset > 0 && gtid > 0) {
padding = KMP_ALLOCA(gtid * __kmp_stkoffset); padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
(void)padding; (void)padding;
@@ -1245,7 +1269,7 @@ static void __kmp_atfork_child(void) {
++__kmp_fork_count; ++__kmp_fork_count;
#if KMP_AFFINITY_SUPPORTED #if KMP_AFFINITY_SUPPORTED
#if KMP_OS_LINUX || KMP_OS_FREEBSD #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX
// reset the affinity in the child to the initial thread // reset the affinity in the child to the initial thread
// affinity in the parent // affinity in the parent
kmp_set_thread_affinity_mask_initial(); kmp_set_thread_affinity_mask_initial();
@@ -2214,6 +2238,7 @@ int __kmp_is_address_mapped(void *addr) {
found = (int)addr < (__builtin_wasm_memory_size(0) * PAGESIZE); found = (int)addr < (__builtin_wasm_memory_size(0) * PAGESIZE);
#elif KMP_OS_DRAGONFLY || KMP_OS_SOLARIS || KMP_OS_AIX #elif KMP_OS_DRAGONFLY || KMP_OS_SOLARIS || KMP_OS_AIX
(void)rc;
// FIXME(DragonFly, Solaris, AIX): Implement this // FIXME(DragonFly, Solaris, AIX): Implement this
found = 1; found = 1;

Some files were not shown because too many files have changed in this diff Show More