From 687a64222b4c87c825258d4dfeb1f0794e8cb300 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:16:27 +0000 Subject: [PATCH 1/7] Vendor import of llvm release_80 branch r363030: https://llvm.org/svn/llvm-project/llvm/branches/release_80@363030 --- CMakeLists.txt | 2 +- cmake/modules/AddLLVM.cmake | 1 + cmake/modules/LLVMProcessSources.cmake | 10 +- docs/ReleaseNotes.rst | 20 + lib/CodeGen/TargetRegisterInfo.cpp | 6 + lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 4 +- lib/MC/ELFObjectWriter.cpp | 1 + lib/MC/MCWin64EH.cpp | 2 +- lib/MC/WasmObjectWriter.cpp | 8 +- lib/Object/COFFImportFile.cpp | 2 +- lib/Target/AArch64/AArch64SchedExynosM4.td | 36 +- lib/Target/AArch64/AArch64SchedPredExynos.td | 11 - lib/Target/AArch64/AArch64SchedPredicates.td | 53 -- lib/Target/AMDGPU/SIFoldOperands.cpp | 73 +- lib/Target/AMDGPU/VOP2Instructions.td | 12 +- lib/Target/ARM/ARMISelLowering.cpp | 30 +- lib/Target/AVR/AVRISelLowering.cpp | 28 +- lib/Target/AVR/AVRISelLowering.h | 8 +- lib/Target/AVR/AVRSubtarget.cpp | 12 +- lib/Target/AVR/AVRSubtarget.h | 5 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 7 +- lib/Target/Mips/MicroMips32r6InstrInfo.td | 4 +- lib/Target/Mips/MicroMipsInstrFPU.td | 5 + lib/Target/Mips/MipsAsmPrinter.cpp | 3 +- lib/Target/Mips/MipsDSPInstrInfo.td | 4 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 1 + lib/Target/Mips/MipsFastISel.cpp | 35 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 3 + .../PowerPC/Disassembler/PPCDisassembler.cpp | 8 + .../PowerPC/InstPrinter/PPCInstPrinter.cpp | 7 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 30 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 4 +- lib/Target/PowerPC/PPCInstrInfo.td | 2 + lib/Target/PowerPC/PPCSubtarget.cpp | 3 + lib/Target/Sparc/SparcRegisterInfo.cpp | 4 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 9 +- lib/Target/X86/X86FastISel.cpp | 2 +- lib/Target/X86/X86TargetMachine.cpp | 6 +- test/CodeGen/AMDGPU/add.ll | 83 +- ...ds-negative-offset-addressing-mode-loop.ll | 6 +- test/CodeGen/AMDGPU/fence-barrier.ll | 3 +- .../CodeGen/AMDGPU/fold-fi-operand-shrink.mir | 230 +++++ .../AMDGPU/fold-immediate-operand-shrink.mir | 72 +- test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll | 3 +- test/CodeGen/AMDGPU/r600.add.ll | 167 ++++ test/CodeGen/AMDGPU/r600.sub.ll | 152 ++++ test/CodeGen/AMDGPU/salu-to-valu.ll | 2 +- test/CodeGen/AMDGPU/sub.ll | 90 +- test/CodeGen/ARM/tail-call-scheduling.ll | 35 + test/CodeGen/AVR/{mul.ll => hardware-mul.ll} | 2 + test/CodeGen/AVR/smul-with-overflow.ll | 2 +- test/CodeGen/AVR/software-mul.ll | 28 + test/CodeGen/AVR/umul-with-overflow.ll | 2 +- test/CodeGen/Mips/Fast-ISel/icmpbr1.ll | 3 +- test/CodeGen/Mips/Fast-ISel/pr40325.ll | 23 + test/CodeGen/Mips/abiflags32.ll | 8 + test/CodeGen/Mips/llvm-ir/fptosi.ll | 418 +++++++++ .../Mips/micromips-pseudo-mtlohi-expand.ll | 63 ++ test/CodeGen/Mips/pseudo-jump-fill.ll | 68 ++ test/CodeGen/PowerPC/ppc32-pic-large.ll | 4 + test/CodeGen/SPARC/fp128.ll | 23 + test/CodeGen/WebAssembly/varargs.ll | 26 + test/CodeGen/X86/PR40322.ll | 164 ++++ test/CodeGen/X86/fast-isel-nontemporal.ll | 72 +- test/CodeGen/X86/regalloc-copy-hints.mir | 805 ++++++++++++++++++ test/MC/PowerPC/ppc64-localentry-symbols.s | 34 + test/MC/WebAssembly/null-output.s | 10 + test/tools/llvm-dlltool/coff-weak-exports.def | 3 + .../tools/llvm-objdump/AMDGPU/source-lines.ll | 4 +- .../llvm-objdump/PowerPC/branch-offset.s | 43 + test/tools/llvm-objdump/PowerPC/lit.local.cfg | 2 + test/tools/llvm-objdump/eh_frame-coff.test | 4 +- .../llvm-objdump/elf-symbol-visibility.test | 37 + tools/llvm-objdump/llvm-objdump.cpp | 30 +- utils/git-svn/git-llvm | 6 +- utils/lit/lit/__init__.py | 2 +- utils/release/merge-request.sh | 3 + 77 files changed, 2834 insertions(+), 359 deletions(-) create mode 100644 test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir create mode 100644 test/CodeGen/AMDGPU/r600.add.ll create mode 100644 test/CodeGen/AMDGPU/r600.sub.ll create mode 100644 test/CodeGen/ARM/tail-call-scheduling.ll rename test/CodeGen/AVR/{mul.ll => hardware-mul.ll} (90%) create mode 100644 test/CodeGen/AVR/software-mul.ll create mode 100644 test/CodeGen/Mips/Fast-ISel/pr40325.ll create mode 100644 test/CodeGen/Mips/llvm-ir/fptosi.ll create mode 100644 test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll create mode 100644 test/CodeGen/Mips/pseudo-jump-fill.ll create mode 100644 test/CodeGen/X86/PR40322.ll create mode 100644 test/CodeGen/X86/regalloc-copy-hints.mir create mode 100644 test/MC/PowerPC/ppc64-localentry-symbols.s create mode 100644 test/MC/WebAssembly/null-output.s create mode 100644 test/tools/llvm-objdump/PowerPC/branch-offset.s create mode 100644 test/tools/llvm-objdump/PowerPC/lit.local.cfg create mode 100644 test/tools/llvm-objdump/elf-symbol-visibility.test diff --git a/CMakeLists.txt b/CMakeLists.txt index 27754f33949..81c2bab39ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX "") diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 0df6845aaa7..1a417447278 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -855,6 +855,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS) llvm-lib llvm-objdump llvm-rc + llvm-profdata ) endif() diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake index 7cbd2863500..d0be0e8b3ba 100644 --- a/cmake/modules/LLVMProcessSources.cmake +++ b/cmake/modules/LLVMProcessSources.cmake @@ -30,7 +30,15 @@ endmacro(add_td_sources) function(add_header_files_for_glob hdrs_out glob) file(GLOB hds ${glob}) - set(${hdrs_out} ${hds} PARENT_SCOPE) + set(filtered) + foreach(file ${hds}) + # Explicit existence check is necessary to filter dangling symlinks + # out. See https://bugs.gentoo.org/674662. + if(EXISTS ${file}) + list(APPEND filtered ${file}) + endif() + endforeach() + set(${hdrs_out} ${filtered} PARENT_SCOPE) endfunction(add_header_files_for_glob) function(find_all_header_files hdrs_out additional_headerdirs) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 8209c08889e..16121585d24 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -110,6 +110,26 @@ updated to use LLJIT. MCJIT and ExecutionEngine continue to be supported, though ORC should be preferred for new projects. +Changes to the C++ APIs +----------------------- + +Three of the IR library methods related to debugging information for +functions and methods have changed their prototypes: + + DIBuilder::createMethod + DIBuilder::createFunction + DIBuilder::createTempFunctionFwdDecl + +In all cases, several individual parameters were removed, and replaced +by a single 'SPFlags' (subprogram flags) parameter. The individual +parameters are: 'isLocalToUnit'; 'isDefinition'; 'isOptimized'; and +for 'createMethod', 'Virtuality'. The new 'SPFlags' parameter has a +default value equivalent to passing 'false' for the three 'bool' +parameters, and zero (non-virtual) to the 'Virtuality' parameter. For +any old-style API call that passed 'true' or a non-zero virtuality to +these methods, you will need to substitute the correct 'SPFlags' value. +The helper method 'DISubprogram::toSPFlags()' might be useful in making +this conversion. Changes to the AArch64 Target ----------------------------- diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 661dc18f7a8..d3059280a60 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -398,6 +399,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, const std::pair> &Hints_MRI = MRI.getRegAllocationHints(VirtReg); + SmallSet HintedRegs; // First hint may be a target hint. bool Skip = (Hints_MRI.first != 0); for (auto Reg : Hints_MRI.second) { @@ -411,6 +413,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (VRM && isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); + // Don't add the same reg twice (Hints_MRI may contain multiple virtual + // registers allocated to the same physreg). + if (!HintedRegs.insert(Phys).second) + continue; // Check that Phys is a valid hint in VirtReg's register class. if (!isPhysicalRegister(Phys)) continue; diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index ba55ffc2817..8a88a2fa3a0 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -301,7 +301,7 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor); OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister); if (Personality) - OS << format(" Personality Address: %08x\n", *Personality); + OS << format(" Personality Address: %016" PRIx64 "\n", *Personality); if (!AugmentationData.empty()) { OS << " Augmentation data: "; for (uint8_t Byte : AugmentationData) @@ -320,7 +320,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { (uint32_t)InitialLocation, (uint32_t)InitialLocation + (uint32_t)AddressRange); if (LSDAAddress) - OS << format(" LSDA Address: %08x\n", *LSDAAddress); + OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); CFIs.dump(OS, MRI, IsEH); OS << "\n"; } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index ade858113a3..1b505776ca1 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1271,6 +1271,7 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, // This is the first place we are able to copy this information. Alias->setExternal(Symbol.isExternal()); Alias->setBinding(Symbol.getBinding()); + Alias->setOther(Symbol.getOther()); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) continue; diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 8bc1f08c887..3ef1514455a 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -522,7 +522,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { if (MatchingEpilog) { assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && "Duplicate epilog not found"); - EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog]; + EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog); // Clear the unwind codes in the EpilogMap, so that they don't get output // in the logic below. EpilogInstrs.clear(); diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 333748db919..b07fe05cad5 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -368,7 +368,13 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section, // Now that the section is complete and we know how big it is, patch up the // section size field at the start of the section. void WasmObjectWriter::endSection(SectionBookkeeping &Section) { - uint64_t Size = W.OS.tell() - Section.PayloadOffset; + uint64_t Size = W.OS.tell(); + // /dev/null doesn't support seek/tell and can report offset of 0. + // Simply skip this patching in that case. + if (!Size) + return; + + Size -= Section.PayloadOffset; if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index dc11cc4bcff..e7c7efe4367 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -496,7 +496,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, // COFF Header coff_file_header Header{ - u16(0), + u16(Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td index 4d892465b3f..61652b1d8e3 100644 --- a/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -239,7 +239,6 @@ def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF, M4UnitS0]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } -def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC, M4UnitNMSC]> { let Latency = 5; let NumMicroOps = 2; } @@ -480,8 +479,6 @@ def M4WriteCOPY : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteMOVI : SchedWriteVariant<[SchedVar, SchedVar]>; -def M4WriteMULL : SchedWriteVariant<[SchedVar, - SchedVar]>; // Fast forwarding. def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>; @@ -489,7 +486,8 @@ def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4, M4WriteFMAC4H, M4WriteFMAC5]>; def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>; -def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>; +def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>; + //===----------------------------------------------------------------------===// // Coarse scheduling model. @@ -662,10 +660,8 @@ def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>; def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>; // FP load instructions. def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; @@ -736,14 +732,20 @@ def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>; def : InstRW<[M4WriteNMUL3, M4ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^SQRDML[AS]H")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; @@ -808,10 +810,8 @@ def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>; def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>; def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; diff --git a/lib/Target/AArch64/AArch64SchedPredExynos.td b/lib/Target/AArch64/AArch64SchedPredExynos.td index 48c54230e9d..316036d8940 100644 --- a/lib/Target/AArch64/AArch64SchedPredExynos.td +++ b/lib/Target/AArch64/AArch64SchedPredExynos.td @@ -103,17 +103,6 @@ def ExynosScaledIdxPred : MCSchedPredicate; // Identify FP instructions. def ExynosFPPred : MCSchedPredicate>; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def ExynosLongVectorUpperFn : TIIPredicate< - "isExynosLongVectorUpper", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLongVectorUpperOp.ValidOpcodes, - MCReturnStatement>], - MCReturnStatement>>; -def ExynosLongVectorUpperPred : MCSchedPredicate; - // Identify 128-bit NEON instructions. def ExynosQFormPred : MCSchedPredicate; diff --git a/lib/Target/AArch64/AArch64SchedPredicates.td b/lib/Target/AArch64/AArch64SchedPredicates.td index dbaf11fc95d..b23572b41b9 100644 --- a/lib/Target/AArch64/AArch64SchedPredicates.td +++ b/lib/Target/AArch64/AArch64SchedPredicates.td @@ -268,59 +268,6 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX, def IsLoadStoreRegOffsetOp : CheckOpcode; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32, - FCVTNv8i16, FCVTNv4i32, - FCVTXNv4f32, - PMULLv16i8, PMULLv2i64, - RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32, - RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift, - RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32, - SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64, - SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64, - SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64, - SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64, - SHLLv16i8, SHLLv8i16, SHLLv4i32, - SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift, - SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64, - SMLALv8i16_indexed, SMLALv4i32_indexed, - SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64, - SMLSLv8i16_indexed, SMLSLv4i32_indexed, - SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64, - SMULLv8i16_indexed, SMULLv4i32_indexed, - SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64, - SQDMLALv8i16_indexed, SQDMLALv4i32_indexed, - SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64, - SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed, - SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64, - SQDMULLv8i16_indexed, SQDMULLv4i32_indexed, - SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift, - SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift, - SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift, - SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift, - SQXTNv16i8, SQXTNv8i16, SQXTNv4i32, - SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32, - SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift, - SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64, - SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64, - UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64, - UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64, - UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64, - UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64, - UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64, - UMLALv8i16_indexed, UMLALv4i32_indexed, - UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64, - UMLSLv8i16_indexed, UMLSLv4i32_indexed, - UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64, - UMULLv8i16_indexed, UMULLv4i32_indexed, - UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift, - UQXTNv16i8, UQXTNv8i16, UQXTNv4i32, - USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift, - USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64, - USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64, - XTNv16i8, XTNv8i16, XTNv4i32]>; - // Target predicates. // Identify an instruction that effectively transfers a register to another. diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f4e86695836..d679abd107d 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold, Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); } } + } - if (Fold.needsShrink()) { - MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) - return false; + if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { + MachineBasicBlock *MBB = MI->getParent(); + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); + if (Liveness != MachineBasicBlock::LQR_Dead) + return false; - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - int Op32 = Fold.getShrinkOpcode(); - MachineOperand &Dst0 = MI->getOperand(0); - MachineOperand &Dst1 = MI->getOperand(1); - assert(Dst0.isDef() && Dst1.isDef()); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + int Op32 = Fold.getShrinkOpcode(); + MachineOperand &Dst0 = MI->getOperand(0); + MachineOperand &Dst1 = MI->getOperand(1); + assert(Dst0.isDef() && Dst1.isDef()); - bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); + bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); - const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); - unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); - unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC); + const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); + unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); + MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); - if (HaveNonDbgCarryUse) { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) - .addReg(AMDGPU::VCC, RegState::Kill); - } - - // Keep the old instruction around to avoid breaking iterators, but - // replace the outputs with dummy registers. - Dst0.setReg(NewReg0); - Dst1.setReg(NewReg1); - - if (Fold.isCommuted()) - TII.commuteInstruction(*Inst32, false); - return true; + if (HaveNonDbgCarryUse) { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) + .addReg(AMDGPU::VCC, RegState::Kill); } - Old.ChangeToImmediate(Fold.ImmToFold); + // Keep the old instruction around to avoid breaking iterators, but + // replace it with a dummy instruction to remove uses. + // + // FIXME: We should not invert how this pass looks at operands to avoid + // this. Should track set of foldable movs instead of looking for uses + // when looking at a use. + Dst0.setReg(NewReg0); + for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) + MI->RemoveOperand(I); + MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); + + if (Fold.isCommuted()) + TII.commuteInstruction(*Inst32, false); return true; } assert(!Fold.needsShrink() && "not handled"); + if (Fold.isImm()) { + Old.ChangeToImmediate(Fold.ImmToFold); + return true; + } + if (Fold.isFI()) { Old.ChangeToFrameIndex(Fold.FrameIndexToFold); return true; @@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, if ((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64 || Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME - OpToFold->isImm()) { + (OpToFold->isImm() || OpToFold->isFI())) { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); // Verify the other operand is a VGPR, otherwise we would violate the @@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, assert(MI->getOperand(1).isDef()); - int Op32 = AMDGPU::getVOPe32(Opc); + // Make sure to get the 32-bit version of the commuted opcode. + unsigned MaybeCommutedOpc = MI->getOpcode(); + int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, Op32)); return true; diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index e3fd7b5f9fa..8cf524a5128 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -515,18 +515,12 @@ let AddedComplexity = 1 in { } let SubtargetPredicate = HasAddNoCarryInsts in { - def : DivergentBinOp; - def : DivergentBinOp; - def : DivergentBinOp; + def : DivergentBinOp; + def : DivergentBinOp; } - -def : DivergentBinOp; - def : DivergentBinOp; -def : DivergentBinOp; - -def : DivergentBinOp; +def : DivergentBinOp; def : DivergentBinOp; def : DivergentBinOp; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 21de0f6a763..7e90edbbdab 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1984,32 +1984,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { - // Force all the incoming stack arguments to be loaded from the stack - // before any new outgoing arguments are stored to the stack, because the - // outgoing stack slots may alias the incoming argument stack slots, and - // the alias isn't otherwise explicit. This is slightly more conservative - // than necessary, because it means that each store effectively depends - // on every argument instead of just those arguments it would clobber. - - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index 57fc978b54b..5db75778232 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -26,19 +26,21 @@ #include "AVR.h" #include "AVRMachineFunctionInfo.h" +#include "AVRSubtarget.h" #include "AVRTargetMachine.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { -AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) - : TargetLowering(tm) { +AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, + const AVRSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { // Set up the register classes. addRegisterClass(MVT::i8, &AVR::GPR8RegClass); addRegisterClass(MVT::i16, &AVR::DREGSRegClass); // Compute derived properties from the register classes. - computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo()); + computeRegisterProperties(Subtarget.getRegisterInfo()); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); @@ -163,6 +165,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + // Expand multiplications to libcalls when there is + // no hardware MUL. + if (!Subtarget.supportsMultiplication()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + } + for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); @@ -1271,7 +1280,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); @@ -1434,7 +1443,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); switch (MI.getOpcode()) { @@ -1575,7 +1584,7 @@ static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI, MachineBasicBlock *BB) const { const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock::iterator I(MI); ++I; // in any case insert *after* the mul instruction if (isCopyMulResult(I)) @@ -1838,9 +1847,6 @@ std::pair AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - auto STI = static_cast(this->getTargetMachine()) - .getSubtargetImpl(); - // We only support i8 and i16. // //:FIXME: remove this assert for now since it gets sometimes executed @@ -1884,8 +1890,8 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } } - return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(), - Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint( + Subtarget.getRegisterInfo(), Constraint, VT); } void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index c90c65c81f7..7d77dd8fb01 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -64,12 +64,14 @@ enum NodeType { } // end of namespace AVRISD +class AVRSubtarget; class AVRTargetMachine; /// Performs target lowering for the AVR. class AVRTargetLowering : public TargetLowering { public: - explicit AVRTargetLowering(AVRTargetMachine &TM); + explicit AVRTargetLowering(const AVRTargetMachine &TM, + const AVRSubtarget &STI); public: MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override { @@ -164,6 +166,10 @@ class AVRTargetLowering : public TargetLowering { const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; +protected: + + const AVRSubtarget &Subtarget; + private: MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp index 556d69ec523..c7c566270f4 100644 --- a/lib/Target/AVR/AVRSubtarget.cpp +++ b/lib/Target/AVR/AVRSubtarget.cpp @@ -29,9 +29,9 @@ namespace llvm { AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, AVRTargetMachine &TM) + const std::string &FS, const AVRTargetMachine &TM) : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(), - TLInfo(TM), TSInfo(), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(), // Subtarget features m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false), @@ -44,4 +44,12 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, ParseSubtargetFeatures(CPU, FS); } +AVRSubtarget & +AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM) { + // Parse features string. + ParseSubtargetFeatures(CPU, FS); + return *this; +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h index fa26738da19..ba036d5e406 100644 --- a/lib/Target/AVR/AVRSubtarget.h +++ b/lib/Target/AVR/AVRSubtarget.h @@ -37,7 +37,7 @@ class AVRSubtarget : public AVRGenSubtargetInfo { //! \param FS The feature string. //! \param TM The target machine. AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - AVRTargetMachine &TM); + const AVRTargetMachine &TM); const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; } const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } @@ -49,6 +49,9 @@ class AVRSubtarget : public AVRGenSubtargetInfo { /// \note Definition of function is auto generated by `tblgen`. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM); + // Subtarget feature getters. // See AVR.td for details. bool hasSRAM() const { return m_hasSRAM; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 58f9717e1cc..a46f84bd1c9 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -700,8 +700,11 @@ void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation, } void MipsTargetAsmStreamer::emitDirectiveModuleFP() { - OS << "\t.module\tfp="; - OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n"; + MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI(); + if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT) + OS << "\t.module\tsoftfloat\n"; + else + OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n"; } void MipsTargetAsmStreamer::emitDirectiveSetFp( diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index c441aa76ad4..994a8882f94 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -1040,7 +1040,7 @@ class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd, class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>; class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd, - AFGR64Opnd, II_TRUNC>; + FGR64Opnd, II_TRUNC>; class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>; class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd, @@ -1750,6 +1750,8 @@ def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6; def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6; def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm), (ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>, diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td index 1731afc1961..9e76165e7ad 100644 --- a/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/lib/Target/Mips/MicroMipsInstrFPU.td @@ -425,6 +425,11 @@ def : MipsPat<(f64 (fpextend FGR32Opnd:$src)), def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), (TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; +def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6, + FGR_64; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6; // Selects defm : MovzPats0, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index a7a748b0840..c35f5beb688 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -813,7 +813,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // We should always emit a '.module fp=...' but binutils 2.24 does not accept // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or // -mfp64) and omit it otherwise. - if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) + if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) || + STI.useSoftFloat()) TS.emitDirectiveModuleFP(); // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index b9824220b55..a4078026e4f 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1314,7 +1314,9 @@ def PseudoCMPU_LE_QB : PseudoCMP; def PseudoPICK_PH : PseudoPICK; def PseudoPICK_QB : PseudoPICK; -def PseudoMTLOHI_DSP : PseudoMTLOHI; +let AdditionalPredicates = [HasDSP] in { + def PseudoMTLOHI_DSP : PseudoMTLOHI; +} // Patterns. class DSPPat : diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index e3823e0dfdb..61e77fbeea6 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -726,6 +726,7 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, // but we don't have enough information to make that decision. if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 && (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || + Opcode == Mips::PseudoIndirectBranch_MM || Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) continue; // Instructions LWP/SWP and MOVEP should not be in a delay slot as that diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index a18416b9e86..168750b2cba 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -954,21 +954,34 @@ bool MipsFastISel::selectBranch(const Instruction *I) { // MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - // For now, just try the simplest case where it's fed by a compare. + + // Fold the common case of a conditional branch with a comparison + // in the same block. + unsigned ZExtCondReg = 0; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - MVT CIMVT = - TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT(); - if (CIMVT == MVT::i1) + if (CI->hasOneUse() && CI->getParent() == I->getParent()) { + ZExtCondReg = createResultReg(&Mips::GPR32RegClass); + if (!emitCmp(ZExtCondReg, CI)) + return false; + } + } + + // For the general case, we need to mask with 1. + if (ZExtCondReg == 0) { + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) return false; - unsigned CondReg = getRegForValue(CI); - BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) - .addReg(CondReg) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true); + if (ZExtCondReg == 0) + return false; } - return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) + .addReg(ZExtCondReg) + .addMBB(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); + return true; } bool MipsFastISel::selectCmp(const Instruction *I) { diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index c7ab90ed2a3..2b26caaa9f4 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -447,6 +447,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Mips::PseudoMTLOHI_DSP: expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true); break; + case Mips::PseudoMTLOHI_MM: + expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false); + break; case Mips::PseudoCVT_S_W: expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false); break; diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 26869f25082..cce239cac97 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -61,6 +61,14 @@ extern "C" void LLVMInitializePowerPCDisassembler() { createPPCLEDisassembler); } +static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + int32_t Offset = SignExtend32<24>(Imm); + Inst.addOperand(MCOperand::createImm(Offset)); + return MCDisassembler::Success; +} + // FIXME: These can be generated by TableGen from the existing register // encoding values! diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index fc29e4effbb..6824168b890 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -382,8 +382,11 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, // Branches can take an immediate operand. This is used by the branch // selection pass to print .+8, an eight byte displacement from the PC. - O << ".+"; - printAbsBranchOperand(MI, OpNo, O); + O << "."; + int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); + if (Imm >= 0) + O << "+"; + O << Imm; } void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a1e4e07b25a..78609ef3d4e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -15,6 +15,7 @@ #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" @@ -182,16 +183,33 @@ class PPCTargetELFStreamer : public PPCTargetStreamer { void emitAssignment(MCSymbol *S, const MCExpr *Value) override { auto *Symbol = cast(S); + // When encoding an assignment to set symbol A to symbol B, also copy // the st_other bits encoding the local entry point offset. - if (Value->getKind() != MCExpr::SymbolRef) - return; - const auto &RhsSym = cast( - static_cast(Value)->getSymbol()); - unsigned Other = Symbol->getOther(); + if (copyLocalEntry(Symbol, Value)) + UpdateOther.insert(Symbol); + else + UpdateOther.erase(Symbol); + } + + void finish() override { + for (auto *Sym : UpdateOther) + copyLocalEntry(Sym, Sym->getVariableValue()); + } + +private: + SmallPtrSet UpdateOther; + + bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) { + auto *Ref = dyn_cast(S); + if (!Ref) + return false; + const auto &RhsSym = cast(Ref->getSymbol()); + unsigned Other = D->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK; - Symbol->setOther(Other); + D->setOther(Other); + return true; } }; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 31acd0ff870..70e9049a2ab 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4359,8 +4359,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { const Module *M = MF->getFunction().getParent(); if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || - M->getPICLevel() == PICLevel::SmallPIC) + (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) || + !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC) break; SDValue Op = N->getOperand(1); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index dd3f1ac7908..77aa4fe3d41 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -737,7 +737,9 @@ def abscondbrtarget : Operand { def calltarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; + let DecoderMethod = "DecodePCRel24BranchTarget"; let ParserMatchClass = PPCDirectBrAsmOperand; + let OperandType = "OPERAND_PCREL"; } def abscalltarget : Operand { let PrintMethod = "printAbsBranchOperand"; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c0cbfd779cb..1fdf74549de 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -138,6 +138,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isDarwin()) HasLazyResolverStubs = true; + if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD()) + SecurePlt = true; + if (HasSPE && IsPPC64) report_fatal_error( "SPE is only supported for 32-bit targets.\n", false); if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU)) diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 33caa66154f..ad6ea3760fe 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -189,7 +189,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) .addReg(FrameReg).addImm(0).addReg(SrcEvenReg); - replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg); MI.setDesc(TII.get(SP::STDFri)); MI.getOperand(2).setReg(SrcOddReg); Offset += 8; @@ -201,7 +201,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) .addReg(FrameReg).addImm(0); - replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + replaceFI(MF, *StMI, *StMI, dl, 1, Offset, FrameReg); MI.setDesc(TII.get(SP::LDDFri)); MI.getOperand(0).setReg(DestOddReg); diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 003848e3422..f7f29d85cbb 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -669,13 +669,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsVarArg) { // Outgoing non-fixed arguments are placed in a buffer. First // compute their offsets and the total amount of buffer space needed. - for (SDValue Arg : - make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &Arg = OutVals[I]; EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Align = std::max(Out.Flags.getOrigAlign(), + Layout.getABITypeAlignment(Ty)); unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), - Layout.getABITypeAlignment(Ty)); + Align); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9dd3f265254..12cd613c34c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (IsNonTemporal && Alignment >= 16) + if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index afcb49dc226..217a12ddf89 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -512,6 +513,9 @@ void X86PassConfig::addPreEmitPass2() { // correct CFA calculation rule where needed by inserting appropriate CFI // instructions. const Triple &TT = TM->getTargetTriple(); - if (!TT.isOSDarwin() && !TT.isOSWindows()) + const MCAsmInfo *MAI = TM->getMCAsmInfo(); + if (!TT.isOSDarwin() && + (!TT.isOSWindows() || + MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI)) addPass(createCFIInstrInserter()); } diff --git a/test/CodeGen/AMDGPU/add.ll b/test/CodeGen/AMDGPU/add.ll index 6a108db879c..bce924ec4a0 100644 --- a/test/CodeGen/AMDGPU/add.ll +++ b/test/CodeGen/AMDGPU/add.ll @@ -1,11 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_add_i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} ; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]] ; GCN: buffer_store_dword v[[V_REG]], @@ -19,9 +16,6 @@ define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* % } ; FUNC-LABEL: {{^}}s_add_v2i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { @@ -34,11 +28,6 @@ define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a } ; FUNC-LABEL: {{^}}s_add_v4i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} @@ -53,15 +42,6 @@ define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a } ; FUNC-LABEL: {{^}}s_add_v8i32: -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT - ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 @@ -78,23 +58,6 @@ entry: } ; FUNC-LABEL: {{^}}s_add_v16i32: -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT - ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 @@ -124,7 +87,7 @@ entry: ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]] ; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 %a = load volatile i32, i32 addrspace(1)* %gep @@ -139,7 +102,7 @@ define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* % ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 %a = load volatile i32, i32 addrspace(1)* %gep @@ -151,13 +114,6 @@ define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1 ; FUNC-LABEL: {{^}}add64: ; GCN: s_add_u32 ; GCN: s_addc_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %add = add i64 %a, %b @@ -172,13 +128,6 @@ entry: ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: ; GCN-NOT: v_addc_u32_e32 s - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64, i64 addrspace(1)* %in @@ -191,13 +140,6 @@ entry: ; FUNC-LABEL: {{^}}add64_in_branch: ; GCN: s_add_u32 ; GCN: s_addc_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 @@ -217,7 +159,26 @@ endif: ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +; Make sure the VOP3 form of add is initially selected. Otherwise pair +; of opies from/to VCC would be necessary + +; GCN-LABEL: {{^}}add_select_vop3: +; SI: v_add_i32_e64 v0, s[0:1], s0, v0 +; VI: v_add_u32_e64 v0, s[0:1], s0, v0 +; GFX9: v_add_u32_e32 v0, s0, v0 + +; GCN: ; def vcc +; GCN: ds_write_b32 +; GCN: ; use vcc +define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) { + %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() + %sub = add i32 %v, %s + store i32 %sub, i32 addrspace(3)* undef + call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index 5997e27fd81..e2c7f1c47cf 100644 --- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -7,6 +7,8 @@ declare void @llvm.amdgcn.s.barrier() #1 ; Function Attrs: nounwind ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop: +; SI: s_movk_i32 [[K_0X88:s[0-9]+]], 0x +; SI: s_movk_i32 [[K_0X100:s[0-9]+]], 0x100 ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] @@ -14,9 +16,9 @@ declare void @llvm.amdgcn.s.barrier() #1 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR8]] ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, 0x88, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, [[K_0X88]], [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x88]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, [[K_0X100]], [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:2 diff --git a/test/CodeGen/AMDGPU/fence-barrier.ll b/test/CodeGen/AMDGPU/fence-barrier.ll index 8f5a06d01fa..7de4f1796b0 100644 --- a/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/test/CodeGen/AMDGPU/fence-barrier.ll @@ -54,7 +54,8 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) { } ; GCN-LABEL: {{^}}test_global -; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} +; GCN: s_movk_i32 [[K:s[0-9]+]], 0x888 +; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}} ; GCN: flat_store_dword ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier diff --git a/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir b/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir new file mode 100644 index 00000000000..ab544665efb --- /dev/null +++ b/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir @@ -0,0 +1,230 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s + +--- + +# First operand is FI is in a VGPR, other operand is a VGPR +name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = COPY $vgpr0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is a VGPR, other operand FI is in a VGPR +name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in an SGPR, other operand is a VGPR +name: shrink_vgpr_fi_sgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: shrink_vgpr_fi_sgpr_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $sgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:sreg_32_xm0 = COPY $sgpr0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is an SGPR, other operand FI is in a VGPR +name: shrink_sgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: shrink_sgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:sreg_32_xm0 = COPY $sgpr0 + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in an SGPR, other operand is a VGPR +name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:sreg_32_xm0 = S_MOV_B32 %stack.0 + %1:vgpr_32 = COPY $vgpr0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is a VGPR, other operand FI is in an SGPR +name: shrink_vgpr_sgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16} +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: shrink_vgpr_sgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:sreg_32_xm0 = S_MOV_B32 %stack.0 + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in a VGPR, other operand is an inline imm in a VGPR +name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is an inline imm in a VGPR, other operand FI is in a VGPR +name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 16, [[V_MOV_B32_e32_]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + %0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is FI is in a VGPR, other operand is an literal constant in a VGPR +name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... + +--- + +# First operand is a literal constant in a VGPR, other operand FI is in a VGPR +name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 64, alignment: 16 } +body: | + bb.0: + + ; GCN-LABEL: name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_ENDPGM implicit %2 + +... diff --git a/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir index 847c2b720cd..15c453f36f6 100644 --- a/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ b/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -250,8 +250,8 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec @@ -269,8 +269,8 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec @@ -288,8 +288,8 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec @@ -307,8 +307,8 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec @@ -590,3 +590,59 @@ body: | S_ENDPGM implicit %2 ... + +--- +name: shrink_add_kill_flags_src0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: shrink_add_kill_flags_src0 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, implicit $exec + S_ENDPGM implicit %2 +... + +--- +name: shrink_add_kill_flags_src1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: shrink_add_kill_flags_src1 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, implicit $exec + S_ENDPGM implicit %2 +... + +--- +name: shrink_addc_kill_flags_src2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vcc + ; GCN-LABEL: name: shrink_addc_kill_flags_src2 + ; GCN: liveins: $vgpr0, $vcc + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], implicit $exec + ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec + %2:sreg_64_xexec = COPY $vcc + %3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, implicit $exec + S_ENDPGM implicit %3 +... diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll index 18abf607aea..77d518f503a 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -15,7 +15,8 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) } ; VI-LABEL: {{^}}dpp_test1: -; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0 ; VI-NEXT: s_nop 0 ; VI-NEXT: s_nop 0 diff --git a/test/CodeGen/AMDGPU/r600.add.ll b/test/CodeGen/AMDGPU/r600.add.ll new file mode 100644 index 00000000000..73eea3ef217 --- /dev/null +++ b/test/CodeGen/AMDGPU/r600.add.ll @@ -0,0 +1,167 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}s_add_i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v2i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1)* %in + %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr + %result = add <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v4i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1)* %in + %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr + %result = add <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v8i32: +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { +entry: + %0 = add <8 x i32> %a, %b + store <8 x i32> %0, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_add_v16i32: +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { +entry: + %0 = add <16 x i32> %a, %b + store <16 x i32> %0, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_add_i32: +define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 + %a = load volatile i32, i32 addrspace(1)* %gep + %b = load volatile i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_add_imm_i32: +define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 + %a = load volatile i32, i32 addrspace(1)* %gep + %result = add i32 %a, 123 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}add64: +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] +; EG-DAG: ADD_INT {{[* ]*}} +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}} +; EG-NOT: SUB +define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %add = add i64 %a, %b + store i64 %add, i64 addrspace(1)* %out + ret void +} + +; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they +; use VCC. The test is designed so that %a will be stored in an SGPR and +; %0 will be stored in a VGPR, so the comiler will be forced to copy %a +; to a VGPR before doing the add. + +; FUNC-LABEL: {{^}}add64_sgpr_vgpr: +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] +; EG-DAG: ADD_INT {{[* ]*}} +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}} +; EG-NOT: SUB +define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { +entry: + %0 = load i64, i64 addrspace(1)* %in + %1 = add i64 %a, %0 + store i64 %1, i64 addrspace(1)* %out + ret void +} + +; Test i64 add inside a branch. +; FUNC-LABEL: {{^}}add64_in_branch: +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] +; EG-DAG: ADD_INT {{[* ]*}} +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}} +; EG-NOT: SUB +define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/r600.sub.ll b/test/CodeGen/AMDGPU/r600.sub.ll new file mode 100644 index 00000000000..2ded4f64328 --- /dev/null +++ b/test/CodeGen/AMDGPU/r600.sub.ll @@ -0,0 +1,152 @@ +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s + +declare i32 @llvm.r600.read.tidig.x() readnone + +; FUNC-LABEL: {{^}}s_sub_i32: +define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sub_imm_i32: +define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { + %result = sub i32 1234, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_imm_i32: +; EG: SUB_INT +define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %a = load i32, i32 addrspace(1)* %in + %result = sub i32 123, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v2i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = sub <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v4i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = sub <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_i16: +define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1 + %a = load volatile i16, i16 addrspace(1)* %gep + %b = load volatile i16, i16 addrspace(1)* %b_ptr + %result = sub i16 %a, %b + store i16 %result, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v2i16: +define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 + %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr + %result = sub <2 x i16> %a, %b + store <2 x i16> %result, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_sub_v4i16: +define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 + %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep + %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr + %result = sub <4 x i16> %a, %b + store <4 x i16> %result, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sub_i64: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY +; EG-DAG: SUB_INT {{[* ]*}} +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT {{[* ]*}} +define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_sub_i64: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY +; EG-DAG: SUB_INT {{[* ]*}} +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT {{[* ]*}} +define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid + %a = load i64, i64 addrspace(1)* %a_ptr + %b = load i64, i64 addrspace(1)* %b_ptr + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_test_sub_v2i64: +define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid + %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr + %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr + %result = sub <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_test_sub_v4i64: +define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid + %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr + %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr + %result = sub <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 9b46962108c..cf42ee9b39b 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -458,7 +458,7 @@ bb7: ; preds = %bb3 } ; GCN-LABEL: {{^}}phi_visit_order: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}} +; GCN: v_add_i32_e64 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 1, v{{[0-9]+}} define amdgpu_kernel void @phi_visit_order() { bb: br label %bb1 diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll index 4bd346dc586..485b374454d 100644 --- a/test/CodeGen/AMDGPU/sub.ll +++ b/test/CodeGen/AMDGPU/sub.ll @@ -1,11 +1,10 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s -declare i32 @llvm.r600.read.tidig.x() readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable -; FUNC-LABEL: {{^}}s_sub_i32: +; GCN-LABEL: {{^}}s_sub_i32: ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}} ; GCN: s_sub_i32 s{{[0-9]+}}, s[[A]], s[[B]] @@ -15,7 +14,7 @@ define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { ret void } -; FUNC-LABEL: {{^}}s_sub_imm_i32: +; GCN-LABEL: {{^}}s_sub_imm_i32: ; GCN: s_load_dword [[A:s[0-9]+]] ; GCN: s_sub_i32 s{{[0-9]+}}, 0x4d2, [[A]] define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { @@ -24,9 +23,7 @@ define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { ret void } -; FUNC-LABEL: {{^}}test_sub_i32: -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - +; GCN-LABEL: {{^}}test_sub_i32: ; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -38,9 +35,7 @@ define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1) ret void } -; FUNC-LABEL: {{^}}test_sub_imm_i32: -; EG: SUB_INT - +; GCN-LABEL: {{^}}test_sub_imm_i32: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x7b, v{{[0-9]+}} ; GFX9: v_sub_u32_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -50,10 +45,7 @@ define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspac ret void } -; FUNC-LABEL: {{^}}test_sub_v2i32: -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - +; GCN-LABEL: {{^}}test_sub_v2i32: ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} @@ -68,12 +60,7 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32 ret void } -; FUNC-LABEL: {{^}}test_sub_v4i32: -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - +; GCN-LABEL: {{^}}test_sub_v4i32: ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} @@ -92,11 +79,11 @@ define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32 ret void } -; FUNC-LABEL: {{^}}test_sub_i16: +; GCN-LABEL: {{^}}test_sub_i16: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, ; GFX89: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1 %a = load volatile i16, i16 addrspace(1)* %gep @@ -106,13 +93,13 @@ define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1) ret void } -; FUNC-LABEL: {{^}}test_sub_v2i16: +; GCN-LABEL: {{^}}test_sub_v2i16: ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} ; GFX9: v_pk_sub_i16 define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep @@ -122,7 +109,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ret void } -; FUNC-LABEL: {{^}}test_sub_v4i16: +; GCN-LABEL: {{^}}test_sub_v4i16: ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} ; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} @@ -131,7 +118,7 @@ define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ; GFX9: v_pk_sub_i16 ; GFX9: v_pk_sub_i16 define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep @@ -141,22 +128,16 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16 ret void } -; FUNC-LABEL: {{^}}s_sub_i64: +; GCN-LABEL: {{^}}s_sub_i64: ; GCN: s_sub_u32 ; GCN: s_subb_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY -; EG-DAG: SUB_INT {{[* ]*}} -; EG-DAG: SUBB_UINT -; EG-DAG: SUB_INT -; EG-DAG: SUB_INT {{[* ]*}} define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { %result = sub i64 %a, %b store i64 %result, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}v_sub_i64: +; GCN-LABEL: {{^}}v_sub_i64: ; SI: v_sub_i32_e32 ; SI: v_subb_u32_e32 @@ -165,14 +146,8 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 ; GFX9: v_sub_co_u32_e32 ; GFX9: v_subb_co_u32_e32 - -; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY -; EG-DAG: SUB_INT {{[* ]*}} -; EG-DAG: SUBB_UINT -; EG-DAG: SUB_INT -; EG-DAG: SUB_INT {{[* ]*}} define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid %a = load i64, i64 addrspace(1)* %a_ptr @@ -182,7 +157,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa ret void } -; FUNC-LABEL: {{^}}v_test_sub_v2i64: +; GCN-LABEL: {{^}}v_test_sub_v2i64: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, ; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, @@ -198,7 +173,7 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr @@ -208,7 +183,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ret void } -; FUNC-LABEL: {{^}}v_test_sub_v4i64: +; GCN-LABEL: {{^}}v_test_sub_v4i64: ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, ; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, ; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, @@ -236,7 +211,7 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr @@ -245,3 +220,22 @@ define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i store <4 x i64> %result, <4 x i64> addrspace(1)* %out ret void } + +; Make sure the VOP3 form of sub is initially selected. Otherwise pair +; of opies from/to VCC would be necessary + +; GCN-LABEL: {{^}}sub_select_vop3: +; SI: v_subrev_i32_e64 v0, s[0:1], s0, v0 +; VI: v_subrev_u32_e64 v0, s[0:1], s0, v0 +; GFX9: v_subrev_u32_e32 v0, s0, v0 + +; GCN: ; def vcc +; GCN: ds_write_b32 +; GCN: ; use vcc +define amdgpu_ps void @sub_select_vop3(i32 inreg %s, i32 %v) { + %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() + %sub = sub i32 %v, %s + store i32 %sub, i32 addrspace(3)* undef + call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) + ret void +} diff --git a/test/CodeGen/ARM/tail-call-scheduling.ll b/test/CodeGen/ARM/tail-call-scheduling.ll new file mode 100644 index 00000000000..591da10256b --- /dev/null +++ b/test/CodeGen/ARM/tail-call-scheduling.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s +target triple = "armv6kz-unknown-unknown-gnueabihf" + +; Make sure this doesn't crash, and we actually emit a tail call. +; Unfortunately, this test is sort of fragile... the original issue only +; shows up if scheduling happens in a very specific order. But including +; it anyway just to demonstrate the issue. +; CHECK: pop {r4, lr} + +@e = external local_unnamed_addr constant [0 x i32 (i32, i32)*], align 4 + +; Function Attrs: nounwind sspstrong +define i32 @AVI_ChunkRead_p_chk(i32 %g) nounwind sspstrong "target-cpu"="arm1176jzf-s" { +entry: + %b = alloca i8, align 1 + %tobool = icmp eq i32 %g, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %add = add nsw i32 %g, 1 + %arrayidx = getelementptr inbounds [0 x i32 (i32, i32)*], [0 x i32 (i32, i32)*]* @e, i32 0, i32 %add + %0 = load i32 (i32, i32)*, i32 (i32, i32)** %arrayidx, align 4 + %call = tail call i32 %0(i32 0, i32 0) #3 + br label %return + +if.end: ; preds = %entry + call void @c(i8* nonnull %b) + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare void @c(i8*) diff --git a/test/CodeGen/AVR/mul.ll b/test/CodeGen/AVR/hardware-mul.ll similarity index 90% rename from test/CodeGen/AVR/mul.ll rename to test/CodeGen/AVR/hardware-mul.ll index 2f169347c46..650697857b7 100644 --- a/test/CodeGen/AVR/mul.ll +++ b/test/CodeGen/AVR/hardware-mul.ll @@ -1,5 +1,7 @@ ; RUN: llc -mattr=mul,movw < %s -march=avr | FileCheck %s +; Tests lowering of multiplication to hardware instructions. + define i8 @mult8(i8 %a, i8 %b) { ; CHECK-LABEL: mult8: ; CHECK: muls r22, r24 diff --git a/test/CodeGen/AVR/smul-with-overflow.ll b/test/CodeGen/AVR/smul-with-overflow.ll index 745e93005cc..9eb2c7411de 100644 --- a/test/CodeGen/AVR/smul-with-overflow.ll +++ b/test/CodeGen/AVR/smul-with-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6 < %s -march=avr | FileCheck %s define i1 @signed_multiplication_did_overflow(i8, i8) unnamed_addr { ; CHECK-LABEL: signed_multiplication_did_overflow: diff --git a/test/CodeGen/AVR/software-mul.ll b/test/CodeGen/AVR/software-mul.ll new file mode 100644 index 00000000000..9a4d28127eb --- /dev/null +++ b/test/CodeGen/AVR/software-mul.ll @@ -0,0 +1,28 @@ +; RUN: llc -mattr=avr6,-mul < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=attiny85 < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=ata5272 < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=attiny861a < %s -march=avr | FileCheck %s +; RUN: llc -mcpu=at90usb82 < %s -march=avr | FileCheck %s + +; Tests lowering of multiplication to compiler support routines. + +; CHECK-LABEL: mul8: +define i8 @mul8(i8 %a, i8 %b) { +; CHECK: mov r25, r24 +; CHECK: mov r24, r22 +; CHECK: mov r22, r25 +; CHECK: call __mulqi3 + %mul = mul i8 %b, %a + ret i8 %mul +} + +; CHECK-LABEL: mul16: +define i16 @mul16(i16 %a, i16 %b) { +; CHECK: movw r18, r24 +; CHECK: movw r24, r22 +; CHECK: movw r22, r18 +; CHECK: call __mulhi3 + %mul = mul nsw i16 %b, %a + ret i16 %mul +} + diff --git a/test/CodeGen/AVR/umul-with-overflow.ll b/test/CodeGen/AVR/umul-with-overflow.ll index aa8b10a313d..c6457552dea 100644 --- a/test/CodeGen/AVR/umul-with-overflow.ll +++ b/test/CodeGen/AVR/umul-with-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mattr=avr6 < %s -march=avr | FileCheck %s define i1 @unsigned_multiplication_did_overflow(i8, i8) unnamed_addr { ; CHECK-LABEL: unsigned_multiplication_did_overflow: diff --git a/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll b/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll index ef8e1c2b014..e44ab36532c 100644 --- a/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll +++ b/test/CodeGen/Mips/Fast-ISel/icmpbr1.ll @@ -17,7 +17,8 @@ bb0: bb1: ; CHECK: # %bb.1: # %bb1 ; CHECK-NEXT: lw $[[REG2:[0-9]+]], [[SPILL]]($sp) # 4-byte Folded Reload -; CHECK-NEXT: bgtz $[[REG2]], $BB0_3 +; CHECK-NEXT: andi $[[REG3:[0-9]+]], $[[REG2]], 1 +; CHECK-NEXT: bgtz $[[REG3]], $BB0_3 br i1 %2, label %bb2, label %bb3 bb2: ; CHECK: $BB0_3: # %bb2 diff --git a/test/CodeGen/Mips/Fast-ISel/pr40325.ll b/test/CodeGen/Mips/Fast-ISel/pr40325.ll new file mode 100644 index 00000000000..a9ce70fe8af --- /dev/null +++ b/test/CodeGen/Mips/Fast-ISel/pr40325.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel -relocation-model=pic -O0 -mcpu=mips32 < %s | FileCheck %s + +define void @test(i32 %x, i1* %p) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: move $1, $4 +; CHECK-NEXT: andi $4, $4, 1 +; CHECK-NEXT: sb $4, 0($5) +; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: bgtz $1, $BB0_1 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %foo +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop + %y = and i32 %x, 1 + %c = icmp eq i32 %y, 1 + store i1 %c, i1* %p + br i1 %c, label %foo, label %foo + +foo: + ret void +} diff --git a/test/CodeGen/Mips/abiflags32.ll b/test/CodeGen/Mips/abiflags32.ll index 39e2a90151e..65201ec0381 100644 --- a/test/CodeGen/Mips/abiflags32.ll +++ b/test/CodeGen/Mips/abiflags32.ll @@ -1,6 +1,12 @@ ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck %s ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fp64 %s -o - | FileCheck -check-prefix=CHECK-64 %s ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 -target-abi n32 %s -o - | FileCheck -check-prefix=CHECK-64n %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 \ +; RUN: -mattr=soft-float %s -o - | FileCheck -check-prefix=SOFT %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32r6 \ +; RUN: -mattr=soft-float %s -o - | FileCheck -check-prefix=SOFT %s +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 \ +; RUN: -mattr=soft-float -target-abi n64 %s -o - | FileCheck -check-prefix=SOFT %s ; CHECK: .nan legacy ; We don't emit '.module fp=32' for compatibility with binutils 2.24 which @@ -15,3 +21,5 @@ ; We don't emit '.module fp=64' for compatibility with binutils 2.24 which ; doesn't accept .module. ; CHECK-64n-NOT: .module fp=64 + +; SOFT: .module softfloat diff --git a/test/CodeGen/Mips/llvm-ir/fptosi.ll b/test/CodeGen/Mips/llvm-ir/fptosi.ll new file mode 100644 index 00000000000..03a0de74664 --- /dev/null +++ b/test/CodeGen/Mips/llvm-ir/fptosi.ll @@ -0,0 +1,418 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+fp64 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R2-FP64 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+soft-float -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R2-SF +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r3 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R3R5 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r5 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R3R5 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r6 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M32R6 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips3 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips64 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips64r2 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64 +; RUN: llc < %s -mtriple=mips64-linux-gnu -mcpu=mips64r6 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=M64R6 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR2-FP32 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips,fp64 -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR2-FP64 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips,soft-float -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR2-SF +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r6 -mattr=+micromips -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR6 +; RUN: llc < %s -mtriple=mips-linux-gnu -mcpu=mips32r6 -mattr=+micromips,soft-float -asm-show-inst |\ +; RUN: FileCheck %s -check-prefixes=MMR6-SF + +; Test that fptosi can be matched for MIPS targets for various FPU +; configurations + +define i32 @test1(float %t) { +; M32-LABEL: test1: +; M32: # %bb.0: # %entry +; M32-NEXT: trunc.w.s $f0, $f12 # +; M32-NEXT: # > +; M32-NEXT: jr $ra # > +; M32-NEXT: mfc1 $2, $f0 # +; M32-NEXT: # > +; +; M32R2-FP64-LABEL: test1: +; M32R2-FP64: # %bb.0: # %entry +; M32R2-FP64-NEXT: trunc.w.s $f0, $f12 # +; M32R2-FP64-NEXT: # > +; M32R2-FP64-NEXT: jr $ra # > +; M32R2-FP64-NEXT: mfc1 $2, $f0 # +; M32R2-FP64-NEXT: # > +; +; M32R2-SF-LABEL: test1: +; M32R2-SF: # %bb.0: # %entry +; M32R2-SF-NEXT: addiu $sp, $sp, -24 # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: .cfi_def_cfa_offset 24 +; M32R2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: .cfi_offset 31, -4 +; M32R2-SF-NEXT: jal __fixsfsi # > +; M32R2-SF-NEXT: nop # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: jr $ra # > +; M32R2-SF-NEXT: addiu $sp, $sp, 24 # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; +; M32R3R5-LABEL: test1: +; M32R3R5: # %bb.0: # %entry +; M32R3R5-NEXT: trunc.w.s $f0, $f12 # +; M32R3R5-NEXT: # > +; M32R3R5-NEXT: jr $ra # > +; M32R3R5-NEXT: mfc1 $2, $f0 # +; M32R3R5-NEXT: # > +; +; M32R6-LABEL: test1: +; M32R6: # %bb.0: # %entry +; M32R6-NEXT: trunc.w.s $f0, $f12 # +; M32R6-NEXT: # > +; M32R6-NEXT: jr $ra # +; M32R6-NEXT: # > +; M32R6-NEXT: mfc1 $2, $f0 # +; M32R6-NEXT: # > +; +; M64-LABEL: test1: +; M64: # %bb.0: # %entry +; M64-NEXT: trunc.w.s $f0, $f12 # +; M64-NEXT: # > +; M64-NEXT: jr $ra # > +; M64-NEXT: mfc1 $2, $f0 # +; M64-NEXT: # > +; +; M64R6-LABEL: test1: +; M64R6: # %bb.0: # %entry +; M64R6-NEXT: trunc.w.s $f0, $f12 # +; M64R6-NEXT: # > +; M64R6-NEXT: jr $ra # +; M64R6-NEXT: # > +; M64R6-NEXT: mfc1 $2, $f0 # +; M64R6-NEXT: # > +; +; MMR2-FP32-LABEL: test1: +; MMR2-FP32: # %bb.0: # %entry +; MMR2-FP32-NEXT: trunc.w.s $f0, $f12 # +; MMR2-FP32-NEXT: # > +; MMR2-FP32-NEXT: jr $ra # > +; MMR2-FP32-NEXT: mfc1 $2, $f0 # +; MMR2-FP32-NEXT: # > +; +; MMR2-FP64-LABEL: test1: +; MMR2-FP64: # %bb.0: # %entry +; MMR2-FP64-NEXT: trunc.w.s $f0, $f12 # +; MMR2-FP64-NEXT: # > +; MMR2-FP64-NEXT: jr $ra # > +; MMR2-FP64-NEXT: mfc1 $2, $f0 # +; MMR2-FP64-NEXT: # > +; +; MMR2-SF-LABEL: test1: +; MMR2-SF: # %bb.0: # %entry +; MMR2-SF-NEXT: addiusp -24 # > +; MMR2-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # > +; MMR2-SF-NEXT: .cfi_offset 31, -4 +; MMR2-SF-NEXT: jal __fixsfsi # > +; MMR2-SF-NEXT: nop # +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # > +; MMR2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # > +; MMR2-SF-NEXT: addiusp 24 # > +; MMR2-SF-NEXT: jrc $ra # > +; +; MMR6-LABEL: test1: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: trunc.w.s $f0, $f12 # +; MMR6-NEXT: # > +; MMR6-NEXT: mfc1 $2, $f0 # +; MMR6-NEXT: # > +; MMR6-NEXT: jrc $ra # > +; +; MMR6-SF-LABEL: test1: +; MMR6-SF: # %bb.0: # %entry +; MMR6-SF-NEXT: addiu $sp, $sp, -24 # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR6-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: .cfi_offset 31, -4 +; MMR6-SF-NEXT: jalr __fixsfsi # > +; MMR6-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: addiu $sp, $sp, 24 # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: jrc $ra # > +entry: + %conv = fptosi float %t to i32 + ret i32 %conv +} + +define i32 @test2(double %t) { +; M32-LABEL: test2: +; M32: # %bb.0: # %entry +; M32-NEXT: trunc.w.d $f0, $f12 # +; M32-NEXT: # > +; M32-NEXT: jr $ra # > +; M32-NEXT: mfc1 $2, $f0 # +; M32-NEXT: # > +; +; M32R2-FP64-LABEL: test2: +; M32R2-FP64: # %bb.0: # %entry +; M32R2-FP64-NEXT: trunc.w.d $f0, $f12 # +; M32R2-FP64-NEXT: # > +; M32R2-FP64-NEXT: jr $ra # > +; M32R2-FP64-NEXT: mfc1 $2, $f0 # +; M32R2-FP64-NEXT: # > +; +; M32R2-SF-LABEL: test2: +; M32R2-SF: # %bb.0: # %entry +; M32R2-SF-NEXT: addiu $sp, $sp, -24 # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: .cfi_def_cfa_offset 24 +; M32R2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: .cfi_offset 31, -4 +; M32R2-SF-NEXT: jal __fixdfsi # > +; M32R2-SF-NEXT: nop # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; M32R2-SF-NEXT: jr $ra # > +; M32R2-SF-NEXT: addiu $sp, $sp, 24 # +; M32R2-SF-NEXT: # +; M32R2-SF-NEXT: # > +; +; M32R3R5-LABEL: test2: +; M32R3R5: # %bb.0: # %entry +; M32R3R5-NEXT: trunc.w.d $f0, $f12 # +; M32R3R5-NEXT: # > +; M32R3R5-NEXT: jr $ra # > +; M32R3R5-NEXT: mfc1 $2, $f0 # +; M32R3R5-NEXT: # > +; +; M32R6-LABEL: test2: +; M32R6: # %bb.0: # %entry +; M32R6-NEXT: trunc.w.d $f0, $f12 # +; M32R6-NEXT: # > +; M32R6-NEXT: jr $ra # +; M32R6-NEXT: # > +; M32R6-NEXT: mfc1 $2, $f0 # +; M32R6-NEXT: # > +; +; M64-LABEL: test2: +; M64: # %bb.0: # %entry +; M64-NEXT: trunc.w.d $f0, $f12 # +; M64-NEXT: # > +; M64-NEXT: jr $ra # > +; M64-NEXT: mfc1 $2, $f0 # +; M64-NEXT: # > +; +; M64R6-LABEL: test2: +; M64R6: # %bb.0: # %entry +; M64R6-NEXT: trunc.w.d $f0, $f12 # +; M64R6-NEXT: # > +; M64R6-NEXT: jr $ra # +; M64R6-NEXT: # > +; M64R6-NEXT: mfc1 $2, $f0 # +; M64R6-NEXT: # > +; +; MMR2-FP32-LABEL: test2: +; MMR2-FP32: # %bb.0: # %entry +; MMR2-FP32-NEXT: trunc.w.d $f0, $f12 # +; MMR2-FP32-NEXT: # > +; MMR2-FP32-NEXT: jr $ra # > +; MMR2-FP32-NEXT: mfc1 $2, $f0 # +; MMR2-FP32-NEXT: # > +; +; MMR2-FP64-LABEL: test2: +; MMR2-FP64: # %bb.0: # %entry +; MMR2-FP64-NEXT: cvt.w.d $f0, $f12 # +; MMR2-FP64-NEXT: # > +; MMR2-FP64-NEXT: jr $ra # > +; MMR2-FP64-NEXT: mfc1 $2, $f0 # +; MMR2-FP64-NEXT: # > +; +; MMR2-SF-LABEL: test2: +; MMR2-SF: # %bb.0: # %entry +; MMR2-SF-NEXT: addiusp -24 # > +; MMR2-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR2-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # > +; MMR2-SF-NEXT: .cfi_offset 31, -4 +; MMR2-SF-NEXT: jal __fixdfsi # > +; MMR2-SF-NEXT: nop # +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # > +; MMR2-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # +; MMR2-SF-NEXT: # > +; MMR2-SF-NEXT: addiusp 24 # > +; MMR2-SF-NEXT: jrc $ra # > +; +; MMR6-LABEL: test2: +; MMR6: # %bb.0: # %entry +; MMR6-NEXT: trunc.w.d $f0, $f12 # +; MMR6-NEXT: # > +; MMR6-NEXT: mfc1 $2, $f0 # +; MMR6-NEXT: # > +; MMR6-NEXT: jrc $ra # > +; +; MMR6-SF-LABEL: test2: +; MMR6-SF: # %bb.0: # %entry +; MMR6-SF-NEXT: addiu $sp, $sp, -24 # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: .cfi_def_cfa_offset 24 +; MMR6-SF-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: .cfi_offset 31, -4 +; MMR6-SF-NEXT: jalr __fixdfsi # > +; MMR6-SF-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: addiu $sp, $sp, 24 # +; MMR6-SF-NEXT: # +; MMR6-SF-NEXT: # > +; MMR6-SF-NEXT: jrc $ra # > +entry: + %conv = fptosi double %t to i32 + ret i32 %conv +} diff --git a/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll b/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll new file mode 100644 index 00000000000..3f86bd24f34 --- /dev/null +++ b/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r2 -mattr=+micromips -asm-show-inst < %s |\ +; RUN: FileCheck %s -check-prefixes=MMR2 +; RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r2 -mattr=+dsp,+micromips -asm-show-inst < %s |\ +; RUN: FileCheck %s -check-prefixes=MMR2-DSP + +define i64 @test(i32 signext %a, i32 signext %b) { +; MMR2-LABEL: test: +; MMR2: # %bb.0: # %entry +; MMR2-NEXT: li16 $2, 0 # +; MMR2-NEXT: # > +; MMR2-NEXT: li16 $3, 1 # +; MMR2-NEXT: # > +; MMR2-NEXT: mtlo $3 # > +; MMR2-NEXT: mthi $2 # > +; MMR2-NEXT: madd $4, $5 # +; MMR2-NEXT: # > +; MMR2-NEXT: mflo16 $2 # > +; MMR2-NEXT: mfhi16 $3 # > +; MMR2-NEXT: jrc $ra # > +; +; MMR2-DSP-LABEL: test: +; MMR2-DSP: # %bb.0: # %entry +; MMR2-DSP-NEXT: li16 $2, 0 # +; MMR2-DSP-NEXT: # > +; MMR2-DSP-NEXT: li16 $3, 1 # +; MMR2-DSP-NEXT: # > +; MMR2-DSP-NEXT: mtlo $3, $ac0 # +; MMR2-DSP-NEXT: # > +; MMR2-DSP-NEXT: mthi $2, $ac0 # +; MMR2-DSP-NEXT: # > +; MMR2-DSP-NEXT: madd $ac0, $4, $5 # +; MMR2-DSP-NEXT: # +; MMR2-DSP-NEXT: # +; MMR2-DSP-NEXT: # > +; MMR2-DSP-NEXT: mflo $2, $ac0 # +; MMR2-DSP-NEXT: # > +; MMR2-DSP-NEXT: jr $ra # > +; MMR2-DSP-NEXT: mfhi $3, $ac0 # +; MMR2-DSP-NEXT: # > +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %mul = mul nsw i64 %conv, %conv1 + %add = add nsw i64 %mul, 1 + ret i64 %add +} diff --git a/test/CodeGen/Mips/pseudo-jump-fill.ll b/test/CodeGen/Mips/pseudo-jump-fill.ll new file mode 100644 index 00000000000..31f077d57a9 --- /dev/null +++ b/test/CodeGen/Mips/pseudo-jump-fill.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel-linux-gnu -mattr=+micromips -relocation-model=pic < %s | FileCheck %s + +; Test that the delay slot filler correctly handles indirect branches for +; microMIPS in regard to incorrectly using 16bit instructions in delay slots of +; 32bit instructions. + +define i32 @test(i32 signext %x, i32 signext %c) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui $2, %hi(_gp_disp) +; CHECK-NEXT: addiu $2, $2, %lo(_gp_disp) +; CHECK-NEXT: addiur2 $5, $5, -1 +; CHECK-NEXT: sltiu $1, $5, 4 +; CHECK-NEXT: beqz $1, $BB0_3 +; CHECK-NEXT: addu $3, $2, $25 +; CHECK-NEXT: $BB0_1: # %entry +; CHECK-NEXT: li16 $2, 0 +; CHECK-NEXT: sll16 $5, $5, 2 +; CHECK-NEXT: lw $6, %got($JTI0_0)($3) +; CHECK-NEXT: addu16 $5, $5, $6 +; CHECK-NEXT: lw $5, %lo($JTI0_0)($5) +; CHECK-NEXT: addu16 $3, $5, $3 +; CHECK-NEXT: jr $3 +; CHECK-NEXT: nop +; CHECK-NEXT: $BB0_2: # %sw.bb2 +; CHECK-NEXT: addiur2 $2, $4, 1 +; CHECK-NEXT: jrc $ra +; CHECK-NEXT: $BB0_3: +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: jrc $ra +; CHECK-NEXT: $BB0_4: # %sw.bb3 +; CHECK-NEXT: addius5 $4, 2 +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: jrc $ra +; CHECK-NEXT: $BB0_5: # %sw.bb5 +; CHECK-NEXT: addius5 $4, 3 +; CHECK-NEXT: move $2, $4 +; CHECK-NEXT: $BB0_6: # %for.cond.cleanup +; CHECK-NEXT: jrc $ra +entry: + switch i32 %c, label %sw.epilog [ + i32 4, label %sw.bb5 + i32 1, label %for.cond.cleanup + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb2: + %add = add nsw i32 %x, 1 + br label %sw.epilog + +sw.bb3: + %add4 = add nsw i32 %x, 2 + br label %sw.epilog + +sw.bb5: + %add6 = add nsw i32 %x, 3 + br label %sw.epilog + +sw.epilog: + %a.0 = phi i32 [ %add6, %sw.bb5 ], [ %add4, %sw.bb3 ], [ %add, %sw.bb2 ], [ %x, %entry ] + br label %for.cond.cleanup + +for.cond.cleanup: + %a.028 = phi i32 [ %a.0, %sw.epilog ], [ 0, %entry ] + ret i32 %a.028 +} diff --git a/test/CodeGen/PowerPC/ppc32-pic-large.ll b/test/CodeGen/PowerPC/ppc32-pic-large.ll index d6e491ea273..272138e5121 100644 --- a/test/CodeGen/PowerPC/ppc32-pic-large.ll +++ b/test/CodeGen/PowerPC/ppc32-pic-large.ll @@ -1,5 +1,9 @@ ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=LARGE-BSS %s ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mattr=+secure-plt -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-netbsd -mattr=+secure-plt -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-netbsd -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-openbsd -mattr=+secure-plt -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s +; RUN: llc < %s -mtriple=powerpc-unknown-openbsd -relocation-model=pic | FileCheck -check-prefix=LARGE-SECUREPLT %s @bar = common global i32 0, align 4 declare i32 @call_foo(i32, ...) diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll index 535f0ef60c4..21a9cdf77e0 100644 --- a/test/CodeGen/SPARC/fp128.ll +++ b/test/CodeGen/SPARC/fp128.ll @@ -53,6 +53,29 @@ entry: ret void } +; CHECK-LABEL: f128_spill_large: +; CHECK: sethi 4, %g1 +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: std %f{{.+}}, [%g1] +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: std %f{{.+}}, [%g1+8] +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: ldd [%g1], %f{{.+}} +; CHECK: sethi 4, %g1 +; CHECK-NEXT: add %g1, %sp, %g1 +; CHECK-NEXT: ldd [%g1+8], %f{{.+}} + +define void @f128_spill_large(<251 x fp128>* noalias sret %scalar.result, <251 x fp128>* byval %a) { +entry: + %0 = load <251 x fp128>, <251 x fp128>* %a, align 8 + call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() + store <251 x fp128> %0, <251 x fp128>* %scalar.result, align 8 + ret void +} + ; CHECK-LABEL: f128_compare: ; HARD: fcmpq ; HARD-NEXT: nop diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll index 1a73716c2a6..5a8df4cd2fe 100644 --- a/test/CodeGen/WebAssembly/varargs.ll +++ b/test/CodeGen/WebAssembly/varargs.ll @@ -163,6 +163,32 @@ define void @nonlegal_fixed(fp128 %x, ...) nounwind { ret void } +; Test that an fp128 argument is properly aligned and allocated +; within a vararg buffer. + +; CHECK-LABEL: call_fp128_alignment: +; CHECK: global.get $push7=, __stack_pointer +; CHECK-NEXT: i32.const $push8=, 32 +; CHECK-NEXT: i32.sub $push12=, $pop7, $pop8 +; CHECK-NEXT: local.tee $push11=, $1=, $pop12 +; CHECK-NEXT: global.set __stack_pointer@GLOBAL, $pop11 +; CHECK-NEXT: i32.const $push0=, 24 +; CHECK-NEXT: i32.add $push1=, $1, $pop0 +; CHECK-NEXT: i64.const $push2=, -9223372036854775808 +; CHECK-NEXT: i64.store 0($pop1), $pop2 +; CHECK-NEXT: i32.const $push3=, 16 +; CHECK-NEXT: i32.add $push4=, $1, $pop3 +; CHECK-NEXT: i64.const $push5=, 1 +; CHECK-NEXT: i64.store 0($pop4), $pop5 +; CHECK-NEXT: i32.const $push6=, 7 +; CHECK-NEXT: i32.store 0($1), $pop6 +; CHECK-NEXT: call callee@FUNCTION, $1 +define void @call_fp128_alignment(i8* %p) { +entry: + call void (...) @callee(i8 7, fp128 0xL00000000000000018000000000000000) + ret void +} + declare void @llvm.va_start(i8*) declare void @llvm.va_end(i8*) declare void @llvm.va_copy(i8*, i8*) diff --git a/test/CodeGen/X86/PR40322.ll b/test/CodeGen/X86/PR40322.ll new file mode 100644 index 00000000000..22bf1822c65 --- /dev/null +++ b/test/CodeGen/X86/PR40322.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-windows-gnu | FileCheck %s --check-prefix=CHECK-MINGW-X86 + +%struct.as = type { i32* } + +@_ZZ2amiE2au = internal unnamed_addr global %struct.as zeroinitializer, align 4 +@_ZGVZ2amiE2au = internal global i64 0, align 8 +@_ZTIi = external constant i8* + +define void @_Z2ami(i32) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-MINGW-X86-LABEL: _Z2ami: +; CHECK-MINGW-X86: # %bb.0: # %entry +; CHECK-MINGW-X86-NEXT: pushl %edi +; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 8 +; CHECK-MINGW-X86-NEXT: pushl %esi +; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-MINGW-X86-NEXT: .cfi_offset %esi, -12 +; CHECK-MINGW-X86-NEXT: .cfi_offset %edi, -8 +; CHECK-MINGW-X86-NEXT: movb __ZGVZ2amiE2au, %al +; CHECK-MINGW-X86-NEXT: testb %al, %al +; CHECK-MINGW-X86-NEXT: jne LBB0_4 +; CHECK-MINGW-X86-NEXT: # %bb.1: # %init.check +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $__ZGVZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_guard_acquire +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: testl %eax, %eax +; CHECK-MINGW-X86-NEXT: je LBB0_4 +; CHECK-MINGW-X86-NEXT: # %bb.2: # %init +; CHECK-MINGW-X86-NEXT: Ltmp0: +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $4 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __Znwj +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: Ltmp1: +; CHECK-MINGW-X86-NEXT: # %bb.3: # %invoke.cont +; CHECK-MINGW-X86-NEXT: movl %eax, __ZZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $__ZGVZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_guard_release +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: LBB0_4: # %init.end +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $4 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __Znwj +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: movl %eax, %esi +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $4 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_allocate_exception +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: movl $0, (%eax) +; CHECK-MINGW-X86-NEXT: Ltmp3: +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x0c +; CHECK-MINGW-X86-NEXT: movl .refptr.__ZTIi, %ecx +; CHECK-MINGW-X86-NEXT: pushl $0 +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: pushl %ecx +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: pushl %eax +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_throw +; CHECK-MINGW-X86-NEXT: addl $12, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -12 +; CHECK-MINGW-X86-NEXT: Ltmp4: +; CHECK-MINGW-X86-NEXT: # %bb.8: # %unreachable +; CHECK-MINGW-X86-NEXT: LBB0_5: # %lpad +; CHECK-MINGW-X86-NEXT: Ltmp2: +; CHECK-MINGW-X86-NEXT: movl %eax, %edi +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl $__ZGVZ2amiE2au +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll ___cxa_guard_abort +; CHECK-MINGW-X86-NEXT: jmp LBB0_7 +; CHECK-MINGW-X86-NEXT: LBB0_6: # %lpad1 +; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-MINGW-X86-NEXT: Ltmp5: +; CHECK-MINGW-X86-NEXT: movl %eax, %edi +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl %esi +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __ZdlPv +; CHECK-MINGW-X86-NEXT: LBB0_7: # %eh.resume +; CHECK-MINGW-X86-NEXT: addl $4, %esp +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-MINGW-X86-NEXT: .cfi_escape 0x2e, 0x04 +; CHECK-MINGW-X86-NEXT: pushl %edi +; CHECK-MINGW-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-MINGW-X86-NEXT: calll __Unwind_Resume +; CHECK-MINGW-X86-NEXT: Lfunc_end0: +entry: + %1 = load atomic i8, i8* bitcast (i64* @_ZGVZ2amiE2au to i8*) acquire, align 8 + %guard.uninitialized = icmp eq i8 %1, 0 + br i1 %guard.uninitialized, label %init.check, label %init.end + +init.check: ; preds = %entry + %2 = tail call i32 @__cxa_guard_acquire(i64* nonnull @_ZGVZ2amiE2au) + %tobool = icmp eq i32 %2, 0 + br i1 %tobool, label %init.end, label %init + +init: ; preds = %init.check + %call.i3 = invoke i8* @_Znwj(i32 4) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %init + store i8* %call.i3, i8** bitcast (%struct.as* @_ZZ2amiE2au to i8**), align 4 + tail call void @__cxa_guard_release(i64* nonnull @_ZGVZ2amiE2au) + br label %init.end + +init.end: ; preds = %init.check, %invoke.cont, %entry + %call.i = tail call i8* @_Znwj(i32 4) + %exception = tail call i8* @__cxa_allocate_exception(i32 4) + %3 = bitcast i8* %exception to i32* + store i32 0, i32* %3, align 16 + invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) + to label %unreachable unwind label %lpad1 + +lpad: ; preds = %init + %4 = landingpad { i8*, i32 } + cleanup + %5 = extractvalue { i8*, i32 } %4, 0 + %6 = extractvalue { i8*, i32 } %4, 1 + tail call void @__cxa_guard_abort(i64* nonnull @_ZGVZ2amiE2au) #1 + br label %eh.resume + +lpad1: ; preds = %init.end + %7 = landingpad { i8*, i32 } + cleanup + %8 = extractvalue { i8*, i32 } %7, 0 + %9 = extractvalue { i8*, i32 } %7, 1 + tail call void @_ZdlPv(i8* nonnull %call.i) + br label %eh.resume + +eh.resume: ; preds = %lpad1, %lpad + %exn.slot.0 = phi i8* [ %8, %lpad1 ], [ %5, %lpad ] + %ehselector.slot.0 = phi i32 [ %9, %lpad1 ], [ %6, %lpad ] + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0 + %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1 + resume { i8*, i32 } %lpad.val2 + +unreachable: ; preds = %init.end + unreachable +} + +declare i32 @__cxa_guard_acquire(i64*) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_guard_abort(i64*) +declare void @__cxa_guard_release(i64*) +declare i8* @__cxa_allocate_exception(i32) +declare void @__cxa_throw(i8*, i8*, i8*) +declare noalias nonnull i8* @_Znwj(i32) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @_ZdlPv(i8*) diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll index db1ebfe6060..37e380b2b48 100644 --- a/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -300,10 +300,20 @@ entry: } define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) { -; SSE-LABEL: test_load_nt16xi8: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt16xi8: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt16xi8: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt16xi8: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt16xi8: ; AVX: # %bb.0: # %entry @@ -320,10 +330,20 @@ entry: } define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) { -; SSE-LABEL: test_load_nt8xi16: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt8xi16: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt8xi16: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt8xi16: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt8xi16: ; AVX: # %bb.0: # %entry @@ -340,10 +360,20 @@ entry: } define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) { -; SSE-LABEL: test_load_nt4xi32: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt4xi32: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt4xi32: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt4xi32: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt4xi32: ; AVX: # %bb.0: # %entry @@ -360,10 +390,20 @@ entry: } define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) { -; SSE-LABEL: test_load_nt2xi64: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt2xi64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt2xi64: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt2xi64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt2xi64: ; AVX: # %bb.0: # %entry diff --git a/test/CodeGen/X86/regalloc-copy-hints.mir b/test/CodeGen/X86/regalloc-copy-hints.mir new file mode 100644 index 00000000000..6287066e64f --- /dev/null +++ b/test/CodeGen/X86/regalloc-copy-hints.mir @@ -0,0 +1,805 @@ +# RUN: llc -mtriple=i386-unknown-unknown -mcpu=i486 %s -o - -run-pass greedy \ +# RUN: -debug-only=regalloc 2>&1 | FileCheck %s +# REQUIRES: asserts + +--- | + %0 = type { %1 } + %1 = type { %2, %23, %23*, %27*, %28*, %29, %33*, %34, %42, i8, i32, i32, i32 } + %2 = type { %3, %6, %14, %14, i8, i8*, i8*, %16 } + %3 = type { i32 (...)**, %4*, %5* } + %4 = type { i32 (...)**, %3* } + %5 = type { i32 (...)** } + %6 = type { %7 } + %7 = type { %8, i32, %12 } + %8 = type { %9**, %9**, %9**, %10 } + %9 = type { i32, i32, i32, i8* } + %10 = type { %11 } + %11 = type { %9** } + %12 = type { %13 } + %13 = type { i32 } + %14 = type { i32, %15* } + %15 = type { i32, i32, i8* } + %16 = type { %17 } + %17 = type { %18*, %20, %22 } + %18 = type { %19* } + %19 = type <{ %18, %19*, %18*, i8, [3 x i8] }> + %20 = type { %21 } + %21 = type { %18 } + %22 = type { %13 } + %23 = type { %24 } + %24 = type { %18*, %25, %26 } + %25 = type { %21 } + %26 = type { %13 } + %27 = type { i32 (...)** } + %28 = type { i32 (...)** } + %29 = type { %30 } + %30 = type { %18*, %31, %32 } + %31 = type { %21 } + %32 = type { %13 } + %33 = type { i32 (...)** } + %34 = type { %35 } + %35 = type { %36 } + %36 = type { %37, i32, %41 } + %37 = type { %38**, %38**, %38**, %39 } + %38 = type { %42, i32 } + %39 = type { %40 } + %40 = type { %38** } + %41 = type { %13 } + %42 = type { %43 } + %43 = type { %18*, %44, %45 } + %44 = type { %21 } + %45 = type { %13 } + %46 = type { %47, %48 } + %47 = type <{ %18, %19*, %18*, i8 }> + %48 = type { %49 } + %49 = type { i32, %50 } + %50 = type { { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 }, { i32, i32 } } + + define void @fun(%0* %arg) local_unnamed_addr #0 align 2 personality i32 (...)* @__gxx_personality_v0 { + bb: + %tmp = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1 + %tmp1 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0 + br i1 undef, label %bb5, label %bb6 + + bb5: ; preds = %bb + unreachable + + bb6: ; preds = %bb + %tmp8 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 8, i32 0, i32 1, i32 0, i32 0 + br i1 undef, label %bb10, label %bb9 + + bb9: ; preds = %bb6 + unreachable + + bb10: ; preds = %bb6 + store %18* %tmp8, %18** undef + br i1 undef, label %bb14, label %bb13 + + bb13: ; preds = %bb10 + unreachable + + bb14: ; preds = %bb10 + br i1 undef, label %bb17, label %bb18 + + bb17: ; preds = %bb14 + unreachable + + bb18: ; preds = %bb14 + br i1 undef, label %bb20, label %bb19 + + bb19: ; preds = %bb18 + unreachable + + bb20: ; preds = %bb18 + br i1 undef, label %bb25, label %bb24 + + bb24: ; preds = %bb20 + unreachable + + bb25: ; preds = %bb20 + br i1 undef, label %bb29, label %bb30 + + bb29: ; preds = %bb25 + unreachable + + bb30: ; preds = %bb25 + br i1 undef, label %bb38, label %bb31 + + bb31: ; preds = %bb30 + %tmp32 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 + br i1 undef, label %bb34, label %bb35 + + bb34: ; preds = %bb31 + unreachable + + bb35: ; preds = %bb31 + br i1 undef, label %bb40, label %bb36 + + bb36: ; preds = %bb35 + unreachable + + bb38: ; preds = %bb30 + %tmp391 = bitcast %18* %tmp1 to %19** + br label %bb40 + + bb40: ; preds = %bb35, %bb38 + %tmp41 = phi %18* [ %tmp1, %bb38 ], [ null, %bb35 ] + %tmp42 = phi %19** [ %tmp391, %bb38 ], [ %tmp32, %bb35 ] + br i1 undef, label %bb43, label %bb48 + + bb43: ; preds = %bb40 + %tmp44 = tail call i8* @_Znwj() + store %18* %tmp41, %18** undef + %tmp46 = bitcast %19** %tmp42 to i8** + store i8* %tmp44, i8** %tmp46 + %0 = bitcast i8* %tmp44 to %46* + tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() + br label %bb48 + + bb48: ; preds = %bb43, %bb40 + %tmp49 = phi %46* [ %0, %bb43 ], [ undef, %bb40 ] + %tmp50 = getelementptr inbounds %46, %46* %tmp49, i32 0, i32 1, i32 0, i32 1, i32 4, i32 0 + store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv to i32), i32* %tmp50 + br i1 undef, label %bb52, label %bb53 + + bb52: ; preds = %bb48 + unreachable + + bb53: ; preds = %bb48 + br i1 undef, label %bb55, label %bb54 + + bb54: ; preds = %bb53 + unreachable + + bb55: ; preds = %bb53 + br i1 undef, label %bb59, label %bb58 + + bb58: ; preds = %bb55 + unreachable + + bb59: ; preds = %bb55 + br i1 undef, label %bb62, label %bb61 + + bb61: ; preds = %bb59 + unreachable + + bb62: ; preds = %bb59 + br i1 undef, label %bb64, label %bb65 + + bb64: ; preds = %bb62 + unreachable + + bb65: ; preds = %bb62 + %tmp66 = icmp eq %46* null, null + br i1 %tmp66, label %bb72, label %bb67 + + bb67: ; preds = %bb65 + %tmp68 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0 + br i1 undef, label %bb70, label %bb74 + + bb70: ; preds = %bb67 + unreachable + + bb72: ; preds = %bb65 + %tmp732 = bitcast %18* %tmp1 to %19** + br label %bb74 + + bb74: ; preds = %bb67, %bb72 + %tmp75 = phi %18* [ %tmp1, %bb72 ], [ null, %bb67 ] + %tmp76 = phi %19** [ %tmp732, %bb72 ], [ %tmp68, %bb67 ] + %tmp77 = tail call i8* @_Znwj() + store %18* %tmp75, %18** undef + %tmp79 = bitcast %19** %tmp76 to i8** + store i8* %tmp77, i8** %tmp79 + %1 = bitcast i8* %tmp77 to %46* + tail call void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() + %tmp81 = getelementptr inbounds %46, %46* %1, i32 0, i32 1, i32 0, i32 1, i32 2, i32 0 + store i32 ptrtoint (i1 (%0*)* @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv to i32), i32* %tmp81 + store %18* %tmp8, %18** undef + %2 = bitcast %0* %arg to i8* + %sunkaddr = getelementptr i8, i8* %2, i32 140 + %3 = bitcast i8* %sunkaddr to %18** + %tmp85 = load %18*, %18** %3 + %tmp864 = bitcast %18* %tmp85 to %19** + %tmp87 = load %19*, %19** %tmp864 + %tmp88 = icmp eq %19* %tmp87, null + br i1 %tmp88, label %bb90, label %bb89 + + bb89: ; preds = %bb74 + unreachable + + bb90: ; preds = %bb74 + br i1 undef, label %bb94, label %bb92 + + bb92: ; preds = %bb90 + br i1 undef, label %bb96, label %bb97 + + bb94: ; preds = %bb90 + unreachable + + bb96: ; preds = %bb92 + unreachable + + bb97: ; preds = %bb92 + br i1 undef, label %bb101, label %bb102 + + bb101: ; preds = %bb97 + unreachable + + bb102: ; preds = %bb97 + br i1 undef, label %bb104, label %bb103 + + bb103: ; preds = %bb102 + unreachable + + bb104: ; preds = %bb102 + br i1 undef, label %bb109, label %bb108 + + bb108: ; preds = %bb104 + unreachable + + bb109: ; preds = %bb104 + br i1 undef, label %bb111, label %bb112 + + bb111: ; preds = %bb109 + unreachable + + bb112: ; preds = %bb109 + br i1 undef, label %bb118, label %bb117 + + bb117: ; preds = %bb112 + unreachable + + bb118: ; preds = %bb112 + br i1 undef, label %bb120, label %bb121 + + bb120: ; preds = %bb118 + unreachable + + bb121: ; preds = %bb118 + br i1 undef, label %bb124, label %bb125 + + bb124: ; preds = %bb121 + unreachable + + bb125: ; preds = %bb121 + %4 = bitcast %18* %tmp1 to %46** + %tmp126 = load %46*, %46** %4 + %tmp127 = icmp eq %46* %tmp126, null + br i1 %tmp127, label %bb135, label %bb128 + + bb128: ; preds = %bb125 + br label %bb129 + + bb129: ; preds = %bb131, %bb128 + %tmp130 = icmp ugt i32 undef, 95406324 + br i1 %tmp130, label %bb131, label %bb133 + + bb131: ; preds = %bb129 + br label %bb129 + + bb133: ; preds = %bb129 + unreachable + + bb135: ; preds = %bb125 + br i1 undef, label %bb137, label %bb138 + + bb137: ; preds = %bb135 + unreachable + + bb138: ; preds = %bb135 + unreachable + } + + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv(%0*) #0 + + declare zeroext i1 @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv(%0*) #0 align 2 + + declare i32 @__gxx_personality_v0(...) #0 + + declare noalias nonnull i8* @_Znwj() local_unnamed_addr #0 + + declare void @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_() local_unnamed_addr #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { "target-cpu"="i486" } + attributes #1 = { nounwind } + +... +--- +# A physreg should always only be hinted once per getRegAllocationHints() query. +# CHECK: hints: $ebx $edi +# CHECK-NOT: hints: $ebx $edi $ebx $edi +name: fun +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr32 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr32 } + - { id: 13, class: gr32_abcd } + - { id: 14, class: gr8 } + - { id: 15, class: gr32_abcd } + - { id: 16, class: gr8 } + - { id: 17, class: gr32 } + - { id: 18, class: gr32_abcd } + - { id: 19, class: gr8 } + - { id: 20, class: gr32_abcd } + - { id: 21, class: gr8 } + - { id: 22, class: gr32_abcd } + - { id: 23, class: gr8 } + - { id: 24, class: gr32_abcd } + - { id: 25, class: gr8 } + - { id: 26, class: gr32_abcd } + - { id: 27, class: gr8 } + - { id: 28, class: gr32_abcd } + - { id: 29, class: gr8 } + - { id: 30, class: gr32_abcd } + - { id: 31, class: gr8 } + - { id: 32, class: gr32_abcd } + - { id: 33, class: gr8 } + - { id: 34, class: gr32 } + - { id: 35, class: gr32_abcd } + - { id: 36, class: gr8 } + - { id: 37, class: gr32 } + - { id: 38, class: gr32 } + - { id: 39, class: gr32_abcd } + - { id: 40, class: gr8 } + - { id: 41, class: gr32_abcd } + - { id: 42, class: gr8 } + - { id: 43, class: gr32_abcd } + - { id: 44, class: gr8 } + - { id: 45, class: gr32_abcd } + - { id: 46, class: gr8 } + - { id: 47, class: gr32_abcd } + - { id: 48, class: gr8 } + - { id: 49, class: gr8 } + - { id: 50, class: gr32_abcd } + - { id: 51, class: gr8 } + - { id: 52, class: gr32 } + - { id: 53, class: gr32 } + - { id: 54, class: gr32 } + - { id: 55, class: gr32 } + - { id: 56, class: gr32_abcd } + - { id: 57, class: gr8 } + - { id: 58, class: gr32_abcd } + - { id: 59, class: gr8 } + - { id: 60, class: gr32_abcd } + - { id: 61, class: gr8 } + - { id: 62, class: gr32_abcd } + - { id: 63, class: gr8 } + - { id: 64, class: gr32_abcd } + - { id: 65, class: gr8 } + - { id: 66, class: gr32_abcd } + - { id: 67, class: gr8 } + - { id: 68, class: gr32_abcd } + - { id: 69, class: gr8 } + - { id: 70, class: gr32_abcd } + - { id: 71, class: gr8 } + - { id: 72, class: gr32_abcd } + - { id: 73, class: gr8 } + - { id: 74, class: gr32 } + - { id: 75, class: gr32 } + - { id: 76, class: gr32_abcd } + - { id: 77, class: gr8 } + - { id: 78, class: gr32_abcd } + - { id: 79, class: gr32 } + - { id: 80, class: gr32 } + - { id: 81, class: gr32_abcd } + - { id: 82, class: gr32 } +frameInfo: + maxAlignment: 4 + hasCalls: true +fixedStack: + - { id: 0, size: 4, alignment: 4, stack-id: 0, isImmutable: true } +body: | + bb.0.bb: + successors: %bb.1(0x00000001), %bb.2(0x7fffffff) + + %13:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %13.sub_8bit, %13.sub_8bit, implicit-def $eflags + JNE_1 %bb.2, implicit killed $eflags + JMP_1 %bb.1 + + bb.1.bb5: + successors: + + + bb.2.bb6: + successors: %bb.4(0x7fffffff), %bb.3(0x00000001) + + %15:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %15.sub_8bit, %15.sub_8bit, implicit-def $eflags + JNE_1 %bb.4, implicit killed $eflags + JMP_1 %bb.3 + + bb.3.bb9: + successors: + + + bb.4.bb10: + successors: %bb.6(0x7fffffff), %bb.5(0x00000001) + + %12:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) + %1:gr32 = LEA32r %12, 1, $noreg, 144, $noreg + MOV32mr undef %17:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) + %18:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %18.sub_8bit, %18.sub_8bit, implicit-def $eflags + JNE_1 %bb.6, implicit killed $eflags + JMP_1 %bb.5 + + bb.5.bb13: + successors: + + + bb.6.bb14: + successors: %bb.7(0x00000001), %bb.8(0x7fffffff) + + %20:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %20.sub_8bit, %20.sub_8bit, implicit-def $eflags + JNE_1 %bb.8, implicit killed $eflags + JMP_1 %bb.7 + + bb.7.bb17: + successors: + + + bb.8.bb18: + successors: %bb.10(0x7fffffff), %bb.9(0x00000001) + + %22:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %22.sub_8bit, %22.sub_8bit, implicit-def $eflags + JNE_1 %bb.10, implicit killed $eflags + JMP_1 %bb.9 + + bb.9.bb19: + successors: + + + bb.10.bb20: + successors: %bb.12(0x7fffffff), %bb.11(0x00000001) + + %24:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %24.sub_8bit, %24.sub_8bit, implicit-def $eflags + JNE_1 %bb.12, implicit killed $eflags + JMP_1 %bb.11 + + bb.11.bb24: + successors: + + + bb.12.bb25: + successors: %bb.13(0x00000001), %bb.14(0x7fffffff) + + %26:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %26.sub_8bit, %26.sub_8bit, implicit-def $eflags + JNE_1 %bb.14, implicit killed $eflags + JMP_1 %bb.13 + + bb.13.bb29: + successors: + + + bb.14.bb30: + %0:gr32 = LEA32r %12, 1, $noreg, 80, $noreg + %28:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %28.sub_8bit, %28.sub_8bit, implicit-def $eflags + JNE_1 %bb.20, implicit killed $eflags + JMP_1 %bb.15 + + bb.15.bb31: + successors: %bb.16(0x00000001), %bb.17(0x7fffffff) + + %78:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %78.sub_8bit, %78.sub_8bit, implicit-def $eflags + JNE_1 %bb.17, implicit killed $eflags + JMP_1 %bb.16 + + bb.16.bb34: + successors: + + + bb.17.bb35: + successors: %bb.18(0x7fffffff), %bb.19(0x00000001) + + TEST8rr %78.sub_8bit, %78.sub_8bit, implicit-def $eflags + JE_1 %bb.19, implicit killed $eflags + + bb.18: + %79:gr32 = LEA32r %12, 1, $noreg, 80, $noreg + JMP_1 %bb.21 + + bb.19.bb36: + successors: + + + bb.20.bb38: + %78:gr32_abcd = COPY %0 + %79:gr32 = COPY %0 + + bb.21.bb40: + successors: %bb.22, %bb.23 + + %35:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %35.sub_8bit, %35.sub_8bit, implicit-def $eflags + %80:gr32 = IMPLICIT_DEF + JNE_1 %bb.23, implicit killed $eflags + JMP_1 %bb.22 + + bb.22.bb43: + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + %80:gr32 = COPY killed $eax + MOV32mr undef %38:gr32, 1, $noreg, 0, $noreg, %78 :: (store 4 into `%18** undef`) + MOV32mr %79, 1, $noreg, 0, $noreg, %80 :: (store 4 into %ir.tmp46) + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + + bb.23.bb48: + successors: %bb.24(0x00000001), %bb.25(0x7fffffff) + + MOV32mi %80, 1, $noreg, 52, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private15_preEnd__authorEv :: (store 4 into %ir.tmp50) + %39:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %39.sub_8bit, %39.sub_8bit, implicit-def $eflags + JNE_1 %bb.25, implicit killed $eflags + JMP_1 %bb.24 + + bb.24.bb52: + successors: + + + bb.25.bb53: + successors: %bb.27(0x7fffffff), %bb.26(0x00000001) + + %41:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %41.sub_8bit, %41.sub_8bit, implicit-def $eflags + JNE_1 %bb.27, implicit killed $eflags + JMP_1 %bb.26 + + bb.26.bb54: + successors: + + + bb.27.bb55: + successors: %bb.29(0x7fffffff), %bb.28(0x00000001) + + %43:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %43.sub_8bit, %43.sub_8bit, implicit-def $eflags + JNE_1 %bb.29, implicit killed $eflags + JMP_1 %bb.28 + + bb.28.bb58: + successors: + + + bb.29.bb59: + successors: %bb.31(0x7fffffff), %bb.30(0x00000001) + + %45:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %45.sub_8bit, %45.sub_8bit, implicit-def $eflags + JNE_1 %bb.31, implicit killed $eflags + JMP_1 %bb.30 + + bb.30.bb61: + successors: + + + bb.31.bb62: + successors: %bb.32(0x00000001), %bb.33(0x7fffffff) + + %47:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %47.sub_8bit, %47.sub_8bit, implicit-def $eflags + JNE_1 %bb.33, implicit killed $eflags + JMP_1 %bb.32 + + bb.32.bb64: + successors: + + + bb.33.bb65: + successors: %bb.37(0x30000000), %bb.34(0x50000000) + + %49:gr8 = MOV8ri 1 + TEST8rr %49, %49, implicit-def $eflags + JNE_1 %bb.37, implicit killed $eflags + JMP_1 %bb.34 + + bb.34.bb67: + successors: %bb.36(0x00000001), %bb.35(0x7fffffff) + + %81:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %81.sub_8bit, %81.sub_8bit, implicit-def $eflags + JE_1 %bb.36, implicit killed $eflags + + bb.35: + %82:gr32 = LEA32r %12, 1, $noreg, 80, $noreg + JMP_1 %bb.38 + + bb.36.bb70: + successors: + + + bb.37.bb72: + %81:gr32_abcd = COPY %0 + %82:gr32 = COPY %0 + + bb.38.bb74: + successors: %bb.40(0x7fffffff), %bb.39(0x00000001) + + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_Znwj, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + %52:gr32 = COPY killed $eax + MOV32mr undef %53:gr32, 1, $noreg, 0, $noreg, %81 :: (store 4 into `%18** undef`) + MOV32mr %82, 1, $noreg, 0, $noreg, %52 :: (store 4 into %ir.tmp79) + ADJCALLSTACKDOWN32 0, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CALLpcrel32 @_ZNSt3__127__tree_balance_after_insertIPNS_16__tree_node_baseIPvEEEEvT_S5_, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ADJCALLSTACKUP32 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + MOV32mi %52, 1, $noreg, 36, $noreg, @_ZN15COLLADASaxFWL1429ColladaParserAutoGen14Private14_end__commentsEv :: (store 4 into %ir.tmp81) + MOV32mr undef %54:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `%18** undef`) + %55:gr32 = MOV32rm %12, 1, $noreg, 140, $noreg :: (load 4 from %ir.3) + CMP32mi8 %55, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.tmp864) + JE_1 %bb.40, implicit killed $eflags + JMP_1 %bb.39 + + bb.39.bb89: + successors: + + + bb.40.bb90: + successors: %bb.42(0x00000001), %bb.41(0x7fffffff) + + %56:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %56.sub_8bit, %56.sub_8bit, implicit-def $eflags + JNE_1 %bb.42, implicit killed $eflags + JMP_1 %bb.41 + + bb.41.bb92: + successors: %bb.43(0x00000001), %bb.44(0x7fffffff) + + %58:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %58.sub_8bit, %58.sub_8bit, implicit-def $eflags + JNE_1 %bb.43, implicit killed $eflags + JMP_1 %bb.44 + + bb.42.bb94: + successors: + + + bb.43.bb96: + successors: + + + bb.44.bb97: + successors: %bb.45(0x00000001), %bb.46(0x7fffffff) + + %60:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %60.sub_8bit, %60.sub_8bit, implicit-def $eflags + JNE_1 %bb.46, implicit killed $eflags + JMP_1 %bb.45 + + bb.45.bb101: + successors: + + + bb.46.bb102: + successors: %bb.48(0x7fffffff), %bb.47(0x00000001) + + %62:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %62.sub_8bit, %62.sub_8bit, implicit-def $eflags + JNE_1 %bb.48, implicit killed $eflags + JMP_1 %bb.47 + + bb.47.bb103: + successors: + + + bb.48.bb104: + successors: %bb.50(0x7fffffff), %bb.49(0x00000001) + + %64:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %64.sub_8bit, %64.sub_8bit, implicit-def $eflags + JNE_1 %bb.50, implicit killed $eflags + JMP_1 %bb.49 + + bb.49.bb108: + successors: + + + bb.50.bb109: + successors: %bb.51(0x00000001), %bb.52(0x7fffffff) + + %66:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %66.sub_8bit, %66.sub_8bit, implicit-def $eflags + JNE_1 %bb.52, implicit killed $eflags + JMP_1 %bb.51 + + bb.51.bb111: + successors: + + + bb.52.bb112: + successors: %bb.54(0x7fffffff), %bb.53(0x00000001) + + %68:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %68.sub_8bit, %68.sub_8bit, implicit-def $eflags + JNE_1 %bb.54, implicit killed $eflags + JMP_1 %bb.53 + + bb.53.bb117: + successors: + + + bb.54.bb118: + successors: %bb.55(0x00000001), %bb.56(0x7fffffff) + + %70:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %70.sub_8bit, %70.sub_8bit, implicit-def $eflags + JNE_1 %bb.56, implicit killed $eflags + JMP_1 %bb.55 + + bb.55.bb120: + successors: + + + bb.56.bb121: + successors: %bb.57(0x00000001), %bb.58(0x7fffffff) + + %72:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %72.sub_8bit, %72.sub_8bit, implicit-def $eflags + JNE_1 %bb.58, implicit killed $eflags + JMP_1 %bb.57 + + bb.57.bb124: + successors: + + + bb.58.bb125: + successors: %bb.62(0x00000001), %bb.59(0x7fffffff) + + CMP32mi8 %0, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.4) + JE_1 %bb.62, implicit killed $eflags + JMP_1 %bb.59 + + bb.59.bb128: + + bb.60.bb129: + successors: %bb.60(0x7fffffff), %bb.61(0x00000001) + + CMP32ri undef %75:gr32, 95406325, implicit-def $eflags + JB_1 %bb.61, implicit killed $eflags + JMP_1 %bb.60 + + bb.61.bb133: + successors: + + + bb.62.bb135: + successors: %bb.63, %bb.64 + + %76:gr32_abcd = MOV32r0 implicit-def dead $eflags + TEST8rr %76.sub_8bit, %76.sub_8bit, implicit-def $eflags + JNE_1 %bb.64, implicit killed $eflags + JMP_1 %bb.63 + + bb.63.bb137: + successors: + + + bb.64.bb138: + +... diff --git a/test/MC/PowerPC/ppc64-localentry-symbols.s b/test/MC/PowerPC/ppc64-localentry-symbols.s new file mode 100644 index 00000000000..f1d5c5d0ab1 --- /dev/null +++ b/test/MC/PowerPC/ppc64-localentry-symbols.s @@ -0,0 +1,34 @@ +# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-freebsd13.0 %s -o %t +# RUN: llvm-objdump -t %t | FileCheck %s + +# CHECK: 0000000000000000 gw F .text 00000000 0x60 __impl_foo +# CHECK: 0000000000000000 g F .text 00000000 0x60 foo +# CHECK: 0000000000000000 gw F .text 00000000 0x60 foo@FBSD_1.1 +# CHECK: 0000000000000008 g F .text 00000000 0x60 func +# CHECK: 0000000000000008 gw F .text 00000000 0x60 weak_func + +.text +.abiversion 2 + +.globl foo +.type foo,@function +foo: + nop + nop + .localentry foo, 8 + +.symver __impl_foo, foo@FBSD_1.1 +.weak __impl_foo +.set __impl_foo, foo + +.globl func +# Mimick FreeBSD weak function/reference +.weak weak_func +.equ weak_func, func + +.p2align 2 +.type func,@function +func: + nop + nop + .localentry func, 8 diff --git a/test/MC/WebAssembly/null-output.s b/test/MC/WebAssembly/null-output.s new file mode 100644 index 00000000000..a25d095e0cb --- /dev/null +++ b/test/MC/WebAssembly/null-output.s @@ -0,0 +1,10 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o /dev/null < %s + + .text + .section .text.main,"",@ + .type main,@function +main: + .functype main (i32, i32) -> (i32) + end_function +.Lfunc_end0: + .size main, .Lfunc_end0-main diff --git a/test/tools/llvm-dlltool/coff-weak-exports.def b/test/tools/llvm-dlltool/coff-weak-exports.def index dbc59be8ae1..60f835233a5 100644 --- a/test/tools/llvm-dlltool/coff-weak-exports.def +++ b/test/tools/llvm-dlltool/coff-weak-exports.def @@ -1,5 +1,6 @@ ; RUN: llvm-dlltool -m i386:x86-64 --input-def %s --output-lib %t.a ; RUN: llvm-nm %t.a | FileCheck %s +; RUN: llvm-readobj %t.a | FileCheck -check-prefix=ARCH %s LIBRARY test.dll EXPORTS @@ -26,3 +27,5 @@ ImpLibName3 = kernel32.Sleep ; CHECK-NEXT: W __imp_ImpLibName2 ; CHECK: T ImpLibName3 ; CHECK-NEXT: T __imp_ImpLibName3 + +; ARCH-NOT: unknown arch diff --git a/test/tools/llvm-objdump/AMDGPU/source-lines.ll b/test/tools/llvm-objdump/AMDGPU/source-lines.ll index 748f04754e4..4a4203d2a52 100644 --- a/test/tools/llvm-objdump/AMDGPU/source-lines.ll +++ b/test/tools/llvm-objdump/AMDGPU/source-lines.ll @@ -12,7 +12,7 @@ ; LINE: v_mov_b32_e32 v{{[0-9]+}}, 0x888 ; LINE: ; {{.*}}source-lines.cl:3 ; LINE: ; {{.*}}source-lines.cl:4 -; LINE: v_add_u32_e32 +; LINE: v_add_u32_e64 ; LINE: ; {{.*}}source-lines.cl:5 ; LINE: flat_store_dword ; Epilogue. @@ -28,7 +28,7 @@ ; SOURCE: v_mov_b32_e32 v{{[0-9]+}}, 0x888 ; SOURCE: ; int var1 = 0x888; ; SOURCE: ; int var2 = var0 + var1; -; SOURCE: v_add_u32_e32 +; SOURCE: v_add_u32_e64 ; SOURCE: ; *Out = var2; ; SOURCE: flat_store_dword ; Epilogue. diff --git a/test/tools/llvm-objdump/PowerPC/branch-offset.s b/test/tools/llvm-objdump/PowerPC/branch-offset.s new file mode 100644 index 00000000000..b0b3f05f9cd --- /dev/null +++ b/test/tools/llvm-objdump/PowerPC/branch-offset.s @@ -0,0 +1,43 @@ +# RUN: llvm-mc -triple=powerpc64le-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-objdump -d %t.o | FileCheck %s + +# RUN: llvm-mc -triple=powerpc64-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-objdump -d %t.o | FileCheck %s + +# RUN: llvm-mc -triple=powerpc-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-objdump -d %t.o | FileCheck %s + +# CHECK: 0000000000000000 callee_back: +# CHECK: 18: {{.*}} bl .-24 +# CHECK: 20: {{.*}} bl .+16 +# CHECK: 0000000000000030 callee_forward: + + .text + .global caller + .type caller,@function + .type callee_forward,@function + .type callee_back,@function + + .p2align 4 +callee_back: + li 3, 55 + blr + + .p2align 4 +caller: +.Lgep: + addis 2, 12, .TOC.-.Lgep@ha + addi 2, 2, .TOC.-.Lgep@l +.Llep: + .localentry caller, .Llep-.Lgep + bl callee_back + mr 31, 3 + bl callee_forward + add 3, 3, 31 + blr + + .p2align 4 +callee_forward: + li 3, 66 + blr + diff --git a/test/tools/llvm-objdump/PowerPC/lit.local.cfg b/test/tools/llvm-objdump/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..b77510721e1 --- /dev/null +++ b/test/tools/llvm-objdump/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True diff --git a/test/tools/llvm-objdump/eh_frame-coff.test b/test/tools/llvm-objdump/eh_frame-coff.test index 74eceeec4f3..cb481939dcc 100644 --- a/test/tools/llvm-objdump/eh_frame-coff.test +++ b/test/tools/llvm-objdump/eh_frame-coff.test @@ -8,7 +8,7 @@ # CHECK: Code alignment factor: 1 # CHECK: Data alignment factor: -4 # CHECK: Return address column: 8 -# CHECK: Personality Address: 004025d7 +# CHECK: Personality Address: 00000000004025d7 # CHECK: Augmentation data: 00 D7 25 40 00 00 00 # CHECK: DW_CFA_def_cfa: reg4 +4 @@ -17,7 +17,7 @@ # CHECK: DW_CFA_nop: # CHECK: 00000020 0000001c 00000024 FDE cie=00000024 pc=00401410...00401488 -# CHECK: LSDA Address: 00406000 +# CHECK: LSDA Address: 0000000000406000 # CHECK: DW_CFA_advance_loc: 1 # CHECK: DW_CFA_def_cfa_offset: +8 # CHECK: DW_CFA_offset: reg5 -8 diff --git a/test/tools/llvm-objdump/elf-symbol-visibility.test b/test/tools/llvm-objdump/elf-symbol-visibility.test new file mode 100644 index 00000000000..da7f6d28516 --- /dev/null +++ b/test/tools/llvm-objdump/elf-symbol-visibility.test @@ -0,0 +1,37 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump --syms %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: .text 00000000 default +# CHECK-NEXT: .text 00000000 .internal internal +# CHECK-NEXT: .text 00000000 .hidden hidden +# CHECK-NEXT: .text 00000000 .protected protected +# CHECK-NEXT: .text 00000000 0x20 mips_pic + +!ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS + Flags: [ EF_MIPS_ABI_O32, EF_MIPS_ARCH_32 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +Symbols: + Local: + - Name: default + Section: .text + - Name: internal + Visibility: STV_INTERNAL + Section: .text + - Name: hidden + Visibility: STV_HIDDEN + Section: .text + - Name: protected + Visibility: STV_PROTECTED + Section: .text + - Name: mips_pic + Other: [ STO_MIPS_PIC ] + Section: .text diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index ba8d3c5b8d5..9bd4528ef7f 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -2087,20 +2087,38 @@ void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName, outs() << SectionName; } - outs() << '\t'; if (Common || isa(O)) { uint64_t Val = Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize(); - outs() << format("\t %08" PRIx64 " ", Val); + outs() << format("\t%08" PRIx64, Val); } - if (Hidden) - outs() << ".hidden "; + if (isa(O)) { + uint8_t Other = ELFSymbolRef(Symbol).getOther(); + switch (Other) { + case ELF::STV_DEFAULT: + break; + case ELF::STV_INTERNAL: + outs() << " .internal"; + break; + case ELF::STV_HIDDEN: + outs() << " .hidden"; + break; + case ELF::STV_PROTECTED: + outs() << " .protected"; + break; + default: + outs() << format(" 0x%02x", Other); + break; + } + } else if (Hidden) { + outs() << " .hidden"; + } if (Demangle) - outs() << demangle(Name) << '\n'; + outs() << ' ' << demangle(Name) << '\n'; else - outs() << Name << '\n'; + outs() << ' ' << Name << '\n'; } } diff --git a/utils/git-svn/git-llvm b/utils/git-svn/git-llvm index 53c0b24ae2c..bf234301f13 100755 --- a/utils/git-svn/git-llvm +++ b/utils/git-svn/git-llvm @@ -42,7 +42,7 @@ else: # It's *almost* a straightforward mapping from the monorepo to svn... GIT_TO_SVN_DIR = { - d: (d + '/trunk') + d: (d + '/branches/release_80') for d in [ 'clang-tools-extra', 'compiler-rt', @@ -63,8 +63,8 @@ GIT_TO_SVN_DIR = { 'pstl', ] } -GIT_TO_SVN_DIR.update({'clang': 'cfe/trunk'}) -GIT_TO_SVN_DIR.update({'': 'monorepo-root/trunk'}) +GIT_TO_SVN_DIR.update({'clang': 'cfe/branches/release_80'}) +GIT_TO_SVN_DIR.update({'': 'monorepo-root/branches/release_80'}) VERBOSE = False QUIET = False diff --git a/utils/lit/lit/__init__.py b/utils/lit/lit/__init__.py index 0d849d16f07..023bed81707 100644 --- a/utils/lit/lit/__init__.py +++ b/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (0, 8, 0) +__versioninfo__ = (0, 8, 1) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/utils/release/merge-request.sh b/utils/release/merge-request.sh index 333b9043af3..c9763ee3cec 100755 --- a/utils/release/merge-request.sh +++ b/utils/release/merge-request.sh @@ -101,6 +101,9 @@ case $stable_version in 7.0) release_metabug="39106" ;; + 8.0) + release_metabug="41221" + ;; *) echo "error: invalid stable version" exit 1 From 70198750b9b77147a123f948252a51d04d5c6b28 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:16:50 +0000 Subject: [PATCH 2/7] Vendor import of clang release_80 branch r363030: https://llvm.org/svn/llvm-project/cfe/branches/release_80@363030 --- lib/Basic/Version.cpp | 2 +- lib/CodeGen/CGStmtOpenMP.cpp | 5 ++-- lib/Driver/ToolChains/Clang.cpp | 12 ++++++--- lib/Driver/ToolChains/Linux.cpp | 33 ++++++++++++++---------- lib/Sema/SemaOpenMP.cpp | 3 +-- test/Driver/cl-options.c | 2 +- test/Driver/instrprof-ld.c | 14 ++++++++++ test/Driver/linux-ld.c | 10 +++++++ test/OpenMP/ordered_doacross_codegen.cpp | 13 +++++++++- 9 files changed, 70 insertions(+), 24 deletions(-) diff --git a/lib/Basic/Version.cpp b/lib/Basic/Version.cpp index a15c60e0f55..1d594b97418 100644 --- a/lib/Basic/Version.cpp +++ b/lib/Basic/Version.cpp @@ -36,7 +36,7 @@ std::string getClangRepositoryPath() { // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_800/final/lib/Basic/Version.cpp $"); + StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/branches/release_80/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index eb1304d8934..44dc1cdee0b 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1518,8 +1518,9 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( I < E; ++I) { const auto *DRE = cast(C->getLoopCounter(I)); const auto *VD = cast(DRE->getDecl()); - // Override only those variables that are really emitted already. - if (LocalDeclMap.count(VD)) { + // Override only those variables that can be captured to avoid re-emission + // of the variables declared within the loops. + if (DRE->refersToEnclosingVariableOrCapture()) { (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { return CreateMemTemp(DRE->getType(), VD->getName()); }); diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp index 589f53b1192..78ee7a78176 100644 --- a/lib/Driver/ToolChains/Clang.cpp +++ b/lib/Driver/ToolChains/Clang.cpp @@ -718,8 +718,9 @@ static void appendUserToPath(SmallVectorImpl &Result) { Result.append(UID.begin(), UID.end()); } -static void addPGOAndCoverageFlags(Compilation &C, const Driver &D, - const InputInfo &Output, const ArgList &Args, +static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, + const Driver &D, const InputInfo &Output, + const ArgList &Args, ArgStringList &CmdArgs) { auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate, @@ -759,6 +760,11 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D, ProfileGenerateArg->getValue())); // The default is to use Clang Instrumentation. CmdArgs.push_back("-fprofile-instrument=clang"); + if (TC.getTriple().isWindowsMSVCEnvironment()) { + // Add dependent lib for clang_rt.profile + CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" + + TC.getCompilerRT(Args, "profile"))); + } } if (PGOGenerateArg) { @@ -4118,7 +4124,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // sampling, overhead of call arc collection is way too high and there's no // way to collect the output. if (!Triple.isNVPTX()) - addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs); + addPGOAndCoverageFlags(TC, C, D, Output, Args, CmdArgs); if (auto *ABICompatArg = Args.getLastArg(options::OPT_fclang_abi_compat_EQ)) ABICompatArg->render(Args, CmdArgs); diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp index 65ab9b2daf5..dfdfb18319a 100644 --- a/lib/Driver/ToolChains/Linux.cpp +++ b/lib/Driver/ToolChains/Linux.cpp @@ -45,6 +45,7 @@ static std::string getMultiarchTriple(const Driver &D, TargetTriple.getEnvironment(); bool IsAndroid = TargetTriple.isAndroid(); bool IsMipsR6 = TargetTriple.getSubArch() == llvm::Triple::MipsSubArch_r6; + bool IsMipsN32Abi = TargetTriple.getEnvironment() == llvm::Triple::GNUABIN32; // For most architectures, just use whatever we have rather than trying to be // clever. @@ -103,33 +104,37 @@ static std::string getMultiarchTriple(const Driver &D, return "aarch64_be-linux-gnu"; break; case llvm::Triple::mips: { - std::string Arch = IsMipsR6 ? "mipsisa32r6" : "mips"; - if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-gnu")) - return Arch + "-linux-gnu"; + std::string MT = IsMipsR6 ? "mipsisa32r6-linux-gnu" : "mips-linux-gnu"; + if (D.getVFS().exists(SysRoot + "/lib/" + MT)) + return MT; break; } case llvm::Triple::mipsel: { if (IsAndroid) return "mipsel-linux-android"; - std::string Arch = IsMipsR6 ? "mipsisa32r6el" : "mipsel"; - if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-gnu")) - return Arch + "-linux-gnu"; + std::string MT = IsMipsR6 ? "mipsisa32r6el-linux-gnu" : "mipsel-linux-gnu"; + if (D.getVFS().exists(SysRoot + "/lib/" + MT)) + return MT; break; } case llvm::Triple::mips64: { - std::string Arch = IsMipsR6 ? "mipsisa64r6" : "mips64"; - std::string ABI = llvm::Triple::getEnvironmentTypeName(TargetEnvironment); - if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-" + ABI)) - return Arch + "-linux-" + ABI; + std::string MT = std::string(IsMipsR6 ? "mipsisa64r6" : "mips64") + + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); + if (D.getVFS().exists(SysRoot + "/lib/" + MT)) + return MT; + if (D.getVFS().exists(SysRoot + "/lib/mips64-linux-gnu")) + return "mips64-linux-gnu"; break; } case llvm::Triple::mips64el: { if (IsAndroid) return "mips64el-linux-android"; - std::string Arch = IsMipsR6 ? "mipsisa64r6el" : "mips64el"; - std::string ABI = llvm::Triple::getEnvironmentTypeName(TargetEnvironment); - if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-" + ABI)) - return Arch + "-linux-" + ABI; + std::string MT = std::string(IsMipsR6 ? "mipsisa64r6el" : "mips64el") + + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); + if (D.getVFS().exists(SysRoot + "/lib/" + MT)) + return MT; + if (D.getVFS().exists(SysRoot + "/lib/mips64el-linux-gnu")) + return "mips64el-linux-gnu"; break; } case llvm::Triple::ppc: diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index aedec746af9..8a0be0c472d 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -4602,8 +4602,7 @@ DeclRefExpr *OpenMPIterationSpaceChecker::buildCounterVar( Captures.insert(std::make_pair(LCRef, Ref)); return Ref; } - return buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(), - DefaultLoc); + return cast(LCRef); } Expr *OpenMPIterationSpaceChecker::buildPrivateCounterVar() const { diff --git a/test/Driver/cl-options.c b/test/Driver/cl-options.c index 909e391cec6..5048fd25c4a 100644 --- a/test/Driver/cl-options.c +++ b/test/Driver/cl-options.c @@ -66,7 +66,7 @@ // RUN: %clang_cl -### /FA -fprofile-instr-generate=/tmp/somefile.profraw -- %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-GENERATE-FILE %s // RUN: %clang_cl -### /FA -fprofile-instr-generate -fprofile-instr-use -- %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s // RUN: %clang_cl -### /FA -fprofile-instr-generate -fprofile-instr-use=file -- %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s -// CHECK-PROFILE-GENERATE: "-fprofile-instrument=clang" +// CHECK-PROFILE-GENERATE: "-fprofile-instrument=clang" "--dependent-lib={{[^"]*}}clang_rt.profile-{{[^"]*}}.lib" // CHECK-PROFILE-GENERATE-FILE: "-fprofile-instrument-path=/tmp/somefile.profraw" // CHECK-NO-MIX-GEN-USE: '{{[a-z=-]*}}' not allowed with '{{[a-z=-]*}}' diff --git a/test/Driver/instrprof-ld.c b/test/Driver/instrprof-ld.c index ea201056997..1ac3f9650ff 100644 --- a/test/Driver/instrprof-ld.c +++ b/test/Driver/instrprof-ld.c @@ -121,3 +121,17 @@ // // CHECK-WINDOWS-X86-64: "{{.*}}link{{(.exe)?}}" // CHECK-WINDOWS-X86-64: "{{.*}}clang_rt.profile-x86_64.lib" + +// Test instrumented profiling dependent-lib flags +// +// RUN: %clang %s -### -o %t.o -target x86_64-pc-win32 \ +// RUN: -fprofile-instr-generate 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WINDOWS-X86-64-DEPENDENT-LIB %s +// +// CHECK-WINDOWS-X86-64-DEPENDENT-LIB: "--dependent-lib={{[^"]*}}clang_rt.profile-{{[^"]*}}.lib" +// +// RUN: %clang %s -### -o %t.o -target x86_64-mingw32 \ +// RUN: -fprofile-instr-generate 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-MINGW-X86-64-DEPENDENT-LIB %s +// +// CHECK-MINGW-X86-64-DEPENDENT-LIB-NOT: "--dependent-lib={{[^"]*}}clang_rt.profile-{{[^"]*}}.a" diff --git a/test/Driver/linux-ld.c b/test/Driver/linux-ld.c index 3ab81be4906..a8173478029 100644 --- a/test/Driver/linux-ld.c +++ b/test/Driver/linux-ld.c @@ -1632,6 +1632,11 @@ // CHECK-DEBIAN-ML-MIPS64EL-N32: "-L[[SYSROOT]]/usr/lib" // // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: --target=mips64-unknown-linux-gnu \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/debian_6_mips64_tree \ +// RUN: | FileCheck --check-prefix=CHECK-DEBIAN-ML-MIPS64-GNUABI %s +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: --target=mips64-linux-gnuabi64 -mabi=n64 \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/debian_6_mips64_tree \ @@ -1652,6 +1657,11 @@ // CHECK-DEBIAN-ML-MIPS64-GNUABI: "{{.*}}/usr/lib/gcc/mips64-linux-gnuabi64/4.9/../../../mips64-linux-gnuabi64{{/|\\\\}}crtn.o" // // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: --target=mips64el-unknown-linux-gnu \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/debian_6_mips64_tree \ +// RUN: | FileCheck --check-prefix=CHECK-DEBIAN-ML-MIPS64EL-GNUABI %s +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: --target=mips64el-linux-gnuabi64 -mabi=n64 \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/debian_6_mips64_tree \ diff --git a/test/OpenMP/ordered_doacross_codegen.cpp b/test/OpenMP/ordered_doacross_codegen.cpp index 2f19e9c2d5e..a3abf248d76 100644 --- a/test/OpenMP/ordered_doacross_codegen.cpp +++ b/test/OpenMP/ordered_doacross_codegen.cpp @@ -16,6 +16,17 @@ extern int n; int a[10], b[10], c[10], d[10]; void foo(); +// CHECK-LABEL:bar +void bar() { + int i,j; +// CHECK: call void @__kmpc_doacross_init( +// CHECK: call void @__kmpc_doacross_fini( +#pragma omp parallel for ordered(2) + for (i = 0; i < n; ++i) + for (j = 0; j < n; ++j) + a[i] = b[i] + 1; +} + // CHECK-LABEL: @main() int main() { int i; @@ -35,7 +46,7 @@ int main() { // CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) // CHECK: call void @__kmpc_for_static_init_4( #pragma omp for ordered(1) - for (i = 0; i < n; ++i) { + for (int i = 0; i < n; ++i) { a[i] = b[i] + 1; foo(); // CHECK: invoke void [[FOO:.+]]( From 90579156da340660a42e22e794a3c03ac1b50112 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:16:59 +0000 Subject: [PATCH 3/7] Vendor import of compiler-rt release_80 branch r363030: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_80@363030 --- cmake/base-config-ix.cmake | 4 ++-- lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc | 2 ++ lib/xray/tests/CMakeLists.txt | 5 +++-- test/builtins/Unit/compiler_rt_logb_test.c | 7 +++++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake index 6684d7371d6..aeabf17653f 100644 --- a/cmake/base-config-ix.cmake +++ b/cmake/base-config-ix.cmake @@ -195,10 +195,10 @@ macro(test_targets) # clang's default CPU's. In the 64-bit case, we must also specify the ABI # since the default ABI differs between gcc and clang. # FIXME: Ideally, we would build the N32 library too. - test_target_arch(mipsel "" "-mips32r2" "-mabi=32") + test_target_arch(mipsel "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE" "-D_FILE_OFFSET_BITS=64") test_target_arch(mips64el "" "-mips64r2" "-mabi=64") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "mips") - test_target_arch(mips "" "-mips32r2" "-mabi=32") + test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE" "-D_FILE_OFFSET_BITS=64") test_target_arch(mips64 "" "-mips64r2" "-mabi=64") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm") if(WIN32) diff --git a/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc b/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc index c112e044b1d..c32c80b3e48 100644 --- a/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc +++ b/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc @@ -124,7 +124,9 @@ #include #include #include +#if 0 #include +#endif #include #include #include diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt index deddc5101e7..14585290454 100644 --- a/lib/xray/tests/CMakeLists.txt +++ b/lib/xray/tests/CMakeLists.txt @@ -48,8 +48,7 @@ endfunction() set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH}) set(XRAY_UNITTEST_LINK_FLAGS - ${CMAKE_THREAD_LIBS_INIT} - -l${SANITIZER_CXX_ABI_LIBRARY}) + ${CMAKE_THREAD_LIBS_INIT}) if (NOT APPLE) # Needed by LLVMSupport. @@ -81,6 +80,8 @@ if (NOT APPLE) append_list_if(COMPILER_RT_HAS_LIBEXECINFO -lexecinfo XRAY_UNITTEST_LINK_FLAGS) endif() +list(APPEND XRAY_UNITTEST_LINK_FLAGS -l${SANITIZER_CXX_ABI_LIBRARY}) + macro(add_xray_unittest testname) cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN}) if(UNIX AND NOT APPLE) diff --git a/test/builtins/Unit/compiler_rt_logb_test.c b/test/builtins/Unit/compiler_rt_logb_test.c index 79676598089..9625881316b 100644 --- a/test/builtins/Unit/compiler_rt_logb_test.c +++ b/test/builtins/Unit/compiler_rt_logb_test.c @@ -37,6 +37,10 @@ double cases[] = { }; int main() { + // Do not the run the compiler-rt logb test case if using GLIBC version + // < 2.23. Older versions might not compute to the same value as the + // compiler-rt value. +#if !defined(__GLIBC__) || (defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 23)) const unsigned N = sizeof(cases) / sizeof(cases[0]); unsigned i; for (i = 0; i < N; ++i) { @@ -58,6 +62,9 @@ int main() { if (test__compiler_rt_logb(fromRep(signBit ^ x))) return 1; x >>= 1; } +#else + printf("skipped\n"); +#endif return 0; } From a556a0efb19f863ac71333b3e1bc22d07f4ae89f Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:17:11 +0000 Subject: [PATCH 4/7] Vendor import of libc++ release_80 branch r363030: https://llvm.org/svn/llvm-project/libcxx/branches/release_80@363030 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a6c3c9df4d..6b83bce1ae7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(libcxx CXX C) set(PACKAGE_NAME libcxx) - set(PACKAGE_VERSION 8.0.0) + set(PACKAGE_VERSION 8.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") From 2bcfbb511a63150d7b74d910c8dbda707d78a5e9 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:17:16 +0000 Subject: [PATCH 5/7] Vendor import of LLVM libunwind release_80 branch r363030: https://llvm.org/svn/llvm-project/libunwind/branches/release_80@363030 --- src/UnwindRegistersRestore.S | 232 +++++++++++++++---------------- src/UnwindRegistersSave.S | 256 +++++++++++++++++------------------ src/assembly.h | 2 - 3 files changed, 244 insertions(+), 246 deletions(-) diff --git a/src/UnwindRegistersRestore.S b/src/UnwindRegistersRestore.S index 389db67579c..a155fbe2ddf 100644 --- a/src/UnwindRegistersRestore.S +++ b/src/UnwindRegistersRestore.S @@ -396,119 +396,119 @@ Lnovec: #elif defined(__ppc__) DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) -; -; void libunwind::Registers_ppc::jumpto() -; -; On entry: -; thread_state pointer is in r3 -; +// +// void libunwind::Registers_ppc::jumpto() +// +// On entry: +// thread_state pointer is in r3 +// - ; restore integral registerrs - ; skip r0 for now - ; skip r1 for now - lwz r2, 16(r3) - ; skip r3 for now - ; skip r4 for now - ; skip r5 for now - lwz r6, 32(r3) - lwz r7, 36(r3) - lwz r8, 40(r3) - lwz r9, 44(r3) - lwz r10, 48(r3) - lwz r11, 52(r3) - lwz r12, 56(r3) - lwz r13, 60(r3) - lwz r14, 64(r3) - lwz r15, 68(r3) - lwz r16, 72(r3) - lwz r17, 76(r3) - lwz r18, 80(r3) - lwz r19, 84(r3) - lwz r20, 88(r3) - lwz r21, 92(r3) - lwz r22, 96(r3) - lwz r23,100(r3) - lwz r24,104(r3) - lwz r25,108(r3) - lwz r26,112(r3) - lwz r27,116(r3) - lwz r28,120(r3) - lwz r29,124(r3) - lwz r30,128(r3) - lwz r31,132(r3) + // restore integral registerrs + // skip r0 for now + // skip r1 for now + lwz %r2, 16(%r3) + // skip r3 for now + // skip r4 for now + // skip r5 for now + lwz %r6, 32(%r3) + lwz %r7, 36(%r3) + lwz %r8, 40(%r3) + lwz %r9, 44(%r3) + lwz %r10, 48(%r3) + lwz %r11, 52(%r3) + lwz %r12, 56(%r3) + lwz %r13, 60(%r3) + lwz %r14, 64(%r3) + lwz %r15, 68(%r3) + lwz %r16, 72(%r3) + lwz %r17, 76(%r3) + lwz %r18, 80(%r3) + lwz %r19, 84(%r3) + lwz %r20, 88(%r3) + lwz %r21, 92(%r3) + lwz %r22, 96(%r3) + lwz %r23,100(%r3) + lwz %r24,104(%r3) + lwz %r25,108(%r3) + lwz %r26,112(%r3) + lwz %r27,116(%r3) + lwz %r28,120(%r3) + lwz %r29,124(%r3) + lwz %r30,128(%r3) + lwz %r31,132(%r3) - ; restore float registers - lfd f0, 160(r3) - lfd f1, 168(r3) - lfd f2, 176(r3) - lfd f3, 184(r3) - lfd f4, 192(r3) - lfd f5, 200(r3) - lfd f6, 208(r3) - lfd f7, 216(r3) - lfd f8, 224(r3) - lfd f9, 232(r3) - lfd f10,240(r3) - lfd f11,248(r3) - lfd f12,256(r3) - lfd f13,264(r3) - lfd f14,272(r3) - lfd f15,280(r3) - lfd f16,288(r3) - lfd f17,296(r3) - lfd f18,304(r3) - lfd f19,312(r3) - lfd f20,320(r3) - lfd f21,328(r3) - lfd f22,336(r3) - lfd f23,344(r3) - lfd f24,352(r3) - lfd f25,360(r3) - lfd f26,368(r3) - lfd f27,376(r3) - lfd f28,384(r3) - lfd f29,392(r3) - lfd f30,400(r3) - lfd f31,408(r3) + // restore float registers + lfd %f0, 160(%r3) + lfd %f1, 168(%r3) + lfd %f2, 176(%r3) + lfd %f3, 184(%r3) + lfd %f4, 192(%r3) + lfd %f5, 200(%r3) + lfd %f6, 208(%r3) + lfd %f7, 216(%r3) + lfd %f8, 224(%r3) + lfd %f9, 232(%r3) + lfd %f10,240(%r3) + lfd %f11,248(%r3) + lfd %f12,256(%r3) + lfd %f13,264(%r3) + lfd %f14,272(%r3) + lfd %f15,280(%r3) + lfd %f16,288(%r3) + lfd %f17,296(%r3) + lfd %f18,304(%r3) + lfd %f19,312(%r3) + lfd %f20,320(%r3) + lfd %f21,328(%r3) + lfd %f22,336(%r3) + lfd %f23,344(%r3) + lfd %f24,352(%r3) + lfd %f25,360(%r3) + lfd %f26,368(%r3) + lfd %f27,376(%r3) + lfd %f28,384(%r3) + lfd %f29,392(%r3) + lfd %f30,400(%r3) + lfd %f31,408(%r3) - ; restore vector registers if any are in use - lwz r5,156(r3) ; test VRsave - cmpwi r5,0 - beq Lnovec - - subi r4,r1,16 - rlwinm r4,r4,0,0,27 ; mask low 4-bits - ; r4 is now a 16-byte aligned pointer into the red zone - ; the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer + // restore vector registers if any are in use + lwz %r5, 156(%r3) // test VRsave + cmpwi %r5, 0 + beq Lnovec + subi %r4, %r1, 16 + rlwinm %r4, %r4, 0, 0, 27 // mask low 4-bits + // r4 is now a 16-byte aligned pointer into the red zone + // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer + #define LOAD_VECTOR_UNALIGNEDl(_index) \ - andis. r0,r5,(1<<(15-_index)) @\ - beq Ldone ## _index @\ - lwz r0, 424+_index*16(r3) @\ - stw r0, 0(r4) @\ - lwz r0, 424+_index*16+4(r3) @\ - stw r0, 4(r4) @\ - lwz r0, 424+_index*16+8(r3) @\ - stw r0, 8(r4) @\ - lwz r0, 424+_index*16+12(r3)@\ - stw r0, 12(r4) @\ - lvx v ## _index,0,r4 @\ -Ldone ## _index: + andis. %r0, %r5, (1<<(15-_index)) SEPARATOR \ + beq Ldone ## _index SEPARATOR \ + lwz %r0, 424+_index*16(%r3) SEPARATOR \ + stw %r0, 0(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+4(%r3) SEPARATOR \ + stw %r0, 4(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+8(%r3) SEPARATOR \ + stw %r0, 8(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+12(%r3) SEPARATOR \ + stw %r0, 12(%r4) SEPARATOR \ + lvx %v ## _index, 0, %r4 SEPARATOR \ + Ldone ## _index: #define LOAD_VECTOR_UNALIGNEDh(_index) \ - andi. r0,r5,(1<<(31-_index)) @\ - beq Ldone ## _index @\ - lwz r0, 424+_index*16(r3) @\ - stw r0, 0(r4) @\ - lwz r0, 424+_index*16+4(r3) @\ - stw r0, 4(r4) @\ - lwz r0, 424+_index*16+8(r3) @\ - stw r0, 8(r4) @\ - lwz r0, 424+_index*16+12(r3)@\ - stw r0, 12(r4) @\ - lvx v ## _index,0,r4 @\ - Ldone ## _index: + andi. %r0, %r5, (1<<(31-_index)) SEPARATOR \ + beq Ldone ## _index SEPARATOR \ + lwz %r0, 424+_index*16(%r3) SEPARATOR \ + stw %r0, 0(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+4(%r3) SEPARATOR \ + stw %r0, 4(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+8(%r3) SEPARATOR \ + stw %r0, 8(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+12(%r3) SEPARATOR \ + stw %r0, 12(%r4) SEPARATOR \ + lvx %v ## _index, 0, %r4 SEPARATOR \ + Ldone ## _index: LOAD_VECTOR_UNALIGNEDl(0) @@ -545,17 +545,17 @@ Ldone ## _index: LOAD_VECTOR_UNALIGNEDh(31) Lnovec: - lwz r0, 136(r3) ; __cr - mtocrf 255,r0 - lwz r0, 148(r3) ; __ctr - mtctr r0 - lwz r0, 0(r3) ; __ssr0 - mtctr r0 - lwz r0, 8(r3) ; do r0 now - lwz r5,28(r3) ; do r5 now - lwz r4,24(r3) ; do r4 now - lwz r1,12(r3) ; do sp now - lwz r3,20(r3) ; do r3 last + lwz %r0, 136(%r3) // __cr + mtcr %r0 + lwz %r0, 148(%r3) // __ctr + mtctr %r0 + lwz %r0, 0(%r3) // __ssr0 + mtctr %r0 + lwz %r0, 8(%r3) // do r0 now + lwz %r5, 28(%r3) // do r5 now + lwz %r4, 24(%r3) // do r4 now + lwz %r1, 12(%r3) // do sp now + lwz %r3, 20(%r3) // do r3 last bctr #elif defined(__arm64__) || defined(__aarch64__) diff --git a/src/UnwindRegistersSave.S b/src/UnwindRegistersSave.S index 48ecb0aec70..4b674afc6bd 100644 --- a/src/UnwindRegistersSave.S +++ b/src/UnwindRegistersSave.S @@ -557,144 +557,144 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__ppc__) -; -; extern int unw_getcontext(unw_context_t* thread_state) -; -; On entry: -; thread_state pointer is in r3 -; +// +// extern int unw_getcontext(unw_context_t* thread_state) +// +// On entry: +// thread_state pointer is in r3 +// DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) - stw r0, 8(r3) - mflr r0 - stw r0, 0(r3) ; store lr as ssr0 - stw r1, 12(r3) - stw r2, 16(r3) - stw r3, 20(r3) - stw r4, 24(r3) - stw r5, 28(r3) - stw r6, 32(r3) - stw r7, 36(r3) - stw r8, 40(r3) - stw r9, 44(r3) - stw r10, 48(r3) - stw r11, 52(r3) - stw r12, 56(r3) - stw r13, 60(r3) - stw r14, 64(r3) - stw r15, 68(r3) - stw r16, 72(r3) - stw r17, 76(r3) - stw r18, 80(r3) - stw r19, 84(r3) - stw r20, 88(r3) - stw r21, 92(r3) - stw r22, 96(r3) - stw r23,100(r3) - stw r24,104(r3) - stw r25,108(r3) - stw r26,112(r3) - stw r27,116(r3) - stw r28,120(r3) - stw r29,124(r3) - stw r30,128(r3) - stw r31,132(r3) + stw %r0, 8(%r3) + mflr %r0 + stw %r0, 0(%r3) // store lr as ssr0 + stw %r1, 12(%r3) + stw %r2, 16(%r3) + stw %r3, 20(%r3) + stw %r4, 24(%r3) + stw %r5, 28(%r3) + stw %r6, 32(%r3) + stw %r7, 36(%r3) + stw %r8, 40(%r3) + stw %r9, 44(%r3) + stw %r10, 48(%r3) + stw %r11, 52(%r3) + stw %r12, 56(%r3) + stw %r13, 60(%r3) + stw %r14, 64(%r3) + stw %r15, 68(%r3) + stw %r16, 72(%r3) + stw %r17, 76(%r3) + stw %r18, 80(%r3) + stw %r19, 84(%r3) + stw %r20, 88(%r3) + stw %r21, 92(%r3) + stw %r22, 96(%r3) + stw %r23,100(%r3) + stw %r24,104(%r3) + stw %r25,108(%r3) + stw %r26,112(%r3) + stw %r27,116(%r3) + stw %r28,120(%r3) + stw %r29,124(%r3) + stw %r30,128(%r3) + stw %r31,132(%r3) - ; save VRSave register - mfspr r0,256 - stw r0,156(r3) - ; save CR registers - mfcr r0 - stw r0,136(r3) - ; save CTR register - mfctr r0 - stw r0,148(r3) + // save VRSave register + mfspr %r0, 256 + stw %r0, 156(%r3) + // save CR registers + mfcr %r0 + stw %r0, 136(%r3) + // save CTR register + mfctr %r0 + stw %r0, 148(%r3) - ; save float registers - stfd f0, 160(r3) - stfd f1, 168(r3) - stfd f2, 176(r3) - stfd f3, 184(r3) - stfd f4, 192(r3) - stfd f5, 200(r3) - stfd f6, 208(r3) - stfd f7, 216(r3) - stfd f8, 224(r3) - stfd f9, 232(r3) - stfd f10,240(r3) - stfd f11,248(r3) - stfd f12,256(r3) - stfd f13,264(r3) - stfd f14,272(r3) - stfd f15,280(r3) - stfd f16,288(r3) - stfd f17,296(r3) - stfd f18,304(r3) - stfd f19,312(r3) - stfd f20,320(r3) - stfd f21,328(r3) - stfd f22,336(r3) - stfd f23,344(r3) - stfd f24,352(r3) - stfd f25,360(r3) - stfd f26,368(r3) - stfd f27,376(r3) - stfd f28,384(r3) - stfd f29,392(r3) - stfd f30,400(r3) - stfd f31,408(r3) + // save float registers + stfd %f0, 160(%r3) + stfd %f1, 168(%r3) + stfd %f2, 176(%r3) + stfd %f3, 184(%r3) + stfd %f4, 192(%r3) + stfd %f5, 200(%r3) + stfd %f6, 208(%r3) + stfd %f7, 216(%r3) + stfd %f8, 224(%r3) + stfd %f9, 232(%r3) + stfd %f10,240(%r3) + stfd %f11,248(%r3) + stfd %f12,256(%r3) + stfd %f13,264(%r3) + stfd %f14,272(%r3) + stfd %f15,280(%r3) + stfd %f16,288(%r3) + stfd %f17,296(%r3) + stfd %f18,304(%r3) + stfd %f19,312(%r3) + stfd %f20,320(%r3) + stfd %f21,328(%r3) + stfd %f22,336(%r3) + stfd %f23,344(%r3) + stfd %f24,352(%r3) + stfd %f25,360(%r3) + stfd %f26,368(%r3) + stfd %f27,376(%r3) + stfd %f28,384(%r3) + stfd %f29,392(%r3) + stfd %f30,400(%r3) + stfd %f31,408(%r3) - ; save vector registers + // save vector registers - subi r4,r1,16 - rlwinm r4,r4,0,0,27 ; mask low 4-bits - ; r4 is now a 16-byte aligned pointer into the red zone + subi %r4, %r1, 16 + rlwinm %r4, %r4, 0, 0, 27 // mask low 4-bits + // r4 is now a 16-byte aligned pointer into the red zone #define SAVE_VECTOR_UNALIGNED(_vec, _offset) \ - stvx _vec,0,r4 @\ - lwz r5, 0(r4) @\ - stw r5, _offset(r3) @\ - lwz r5, 4(r4) @\ - stw r5, _offset+4(r3) @\ - lwz r5, 8(r4) @\ - stw r5, _offset+8(r3) @\ - lwz r5, 12(r4) @\ - stw r5, _offset+12(r3) + stvx _vec, 0, %r4 SEPARATOR \ + lwz %r5, 0(%r4) SEPARATOR \ + stw %r5, _offset(%r3) SEPARATOR \ + lwz %r5, 4(%r4) SEPARATOR \ + stw %r5, _offset+4(%r3) SEPARATOR \ + lwz %r5, 8(%r4) SEPARATOR \ + stw %r5, _offset+8(%r3) SEPARATOR \ + lwz %r5, 12(%r4) SEPARATOR \ + stw %r5, _offset+12(%r3) - SAVE_VECTOR_UNALIGNED( v0, 424+0x000) - SAVE_VECTOR_UNALIGNED( v1, 424+0x010) - SAVE_VECTOR_UNALIGNED( v2, 424+0x020) - SAVE_VECTOR_UNALIGNED( v3, 424+0x030) - SAVE_VECTOR_UNALIGNED( v4, 424+0x040) - SAVE_VECTOR_UNALIGNED( v5, 424+0x050) - SAVE_VECTOR_UNALIGNED( v6, 424+0x060) - SAVE_VECTOR_UNALIGNED( v7, 424+0x070) - SAVE_VECTOR_UNALIGNED( v8, 424+0x080) - SAVE_VECTOR_UNALIGNED( v9, 424+0x090) - SAVE_VECTOR_UNALIGNED(v10, 424+0x0A0) - SAVE_VECTOR_UNALIGNED(v11, 424+0x0B0) - SAVE_VECTOR_UNALIGNED(v12, 424+0x0C0) - SAVE_VECTOR_UNALIGNED(v13, 424+0x0D0) - SAVE_VECTOR_UNALIGNED(v14, 424+0x0E0) - SAVE_VECTOR_UNALIGNED(v15, 424+0x0F0) - SAVE_VECTOR_UNALIGNED(v16, 424+0x100) - SAVE_VECTOR_UNALIGNED(v17, 424+0x110) - SAVE_VECTOR_UNALIGNED(v18, 424+0x120) - SAVE_VECTOR_UNALIGNED(v19, 424+0x130) - SAVE_VECTOR_UNALIGNED(v20, 424+0x140) - SAVE_VECTOR_UNALIGNED(v21, 424+0x150) - SAVE_VECTOR_UNALIGNED(v22, 424+0x160) - SAVE_VECTOR_UNALIGNED(v23, 424+0x170) - SAVE_VECTOR_UNALIGNED(v24, 424+0x180) - SAVE_VECTOR_UNALIGNED(v25, 424+0x190) - SAVE_VECTOR_UNALIGNED(v26, 424+0x1A0) - SAVE_VECTOR_UNALIGNED(v27, 424+0x1B0) - SAVE_VECTOR_UNALIGNED(v28, 424+0x1C0) - SAVE_VECTOR_UNALIGNED(v29, 424+0x1D0) - SAVE_VECTOR_UNALIGNED(v30, 424+0x1E0) - SAVE_VECTOR_UNALIGNED(v31, 424+0x1F0) + SAVE_VECTOR_UNALIGNED( %v0, 424+0x000) + SAVE_VECTOR_UNALIGNED( %v1, 424+0x010) + SAVE_VECTOR_UNALIGNED( %v2, 424+0x020) + SAVE_VECTOR_UNALIGNED( %v3, 424+0x030) + SAVE_VECTOR_UNALIGNED( %v4, 424+0x040) + SAVE_VECTOR_UNALIGNED( %v5, 424+0x050) + SAVE_VECTOR_UNALIGNED( %v6, 424+0x060) + SAVE_VECTOR_UNALIGNED( %v7, 424+0x070) + SAVE_VECTOR_UNALIGNED( %v8, 424+0x080) + SAVE_VECTOR_UNALIGNED( %v9, 424+0x090) + SAVE_VECTOR_UNALIGNED(%v10, 424+0x0A0) + SAVE_VECTOR_UNALIGNED(%v11, 424+0x0B0) + SAVE_VECTOR_UNALIGNED(%v12, 424+0x0C0) + SAVE_VECTOR_UNALIGNED(%v13, 424+0x0D0) + SAVE_VECTOR_UNALIGNED(%v14, 424+0x0E0) + SAVE_VECTOR_UNALIGNED(%v15, 424+0x0F0) + SAVE_VECTOR_UNALIGNED(%v16, 424+0x100) + SAVE_VECTOR_UNALIGNED(%v17, 424+0x110) + SAVE_VECTOR_UNALIGNED(%v18, 424+0x120) + SAVE_VECTOR_UNALIGNED(%v19, 424+0x130) + SAVE_VECTOR_UNALIGNED(%v20, 424+0x140) + SAVE_VECTOR_UNALIGNED(%v21, 424+0x150) + SAVE_VECTOR_UNALIGNED(%v22, 424+0x160) + SAVE_VECTOR_UNALIGNED(%v23, 424+0x170) + SAVE_VECTOR_UNALIGNED(%v24, 424+0x180) + SAVE_VECTOR_UNALIGNED(%v25, 424+0x190) + SAVE_VECTOR_UNALIGNED(%v26, 424+0x1A0) + SAVE_VECTOR_UNALIGNED(%v27, 424+0x1B0) + SAVE_VECTOR_UNALIGNED(%v28, 424+0x1C0) + SAVE_VECTOR_UNALIGNED(%v29, 424+0x1D0) + SAVE_VECTOR_UNALIGNED(%v30, 424+0x1E0) + SAVE_VECTOR_UNALIGNED(%v31, 424+0x1F0) - li r3, 0 ; return UNW_ESUCCESS + li %r3, 0 // return UNW_ESUCCESS blr diff --git a/src/assembly.h b/src/assembly.h index 2df930214fa..7806892e9dc 100644 --- a/src/assembly.h +++ b/src/assembly.h @@ -29,8 +29,6 @@ #ifdef _ARCH_PWR8 #define PPC64_HAS_VMX #endif -#elif defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) -#define SEPARATOR @ #elif defined(__arm64__) #define SEPARATOR %% #else From 444e4712399dfed9a74a0a1bd4880ea138a86616 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:17:23 +0000 Subject: [PATCH 6/7] Vendor import of lld release_80 branch r363030: https://llvm.org/svn/llvm-project/lld/branches/release_80@363030 --- COFF/Writer.cpp | 50 +++++++-- ELF/Arch/PPC64.cpp | 9 ++ ELF/InputSection.cpp | 6 +- ELF/SyntheticSections.cpp | 5 + ELF/Writer.cpp | 16 +-- test/COFF/guardcf-thunk.s | 43 +++++++ test/ELF/aarch64-cortex-a53-843419-tlsrelax.s | 4 +- test/ELF/aarch64-tls-gdle.s | 4 +- test/ELF/aarch64-tls-iele.s | 6 +- test/ELF/aarch64-tls-le.s | 8 +- test/ELF/aarch64-tlsld-ldst.s | 50 ++++----- test/ELF/arm-tls-le32.s | 12 +- test/ELF/arm-tls-norelax-ie-le.s | 4 +- test/ELF/compressed-input-alignment.test | 67 +++++++++++ test/ELF/eh-frame-hdr-augmentation.s | 4 +- test/ELF/emit-relocs-mergeable2.s | 14 +++ test/ELF/gc-sections-metadata-startstop.s | 2 +- test/ELF/mips-micro-relocs.s | 6 +- test/ELF/mips-micror6-relocs.s | 4 +- test/ELF/ppc64-bsymbolic-toc-restore.s | 4 +- test/ELF/ppc64-call-reach.s | 40 +++---- test/ELF/ppc64-ifunc.s | 106 ++++++++---------- test/ELF/ppc64-local-dynamic.s | 2 +- test/ELF/ppc64-local-entry.s | 47 ++++++++ test/ELF/ppc64-long-branch-init.s | 43 +++++++ test/ELF/ppc64-plt-stub.s | 11 +- test/ELF/ppc64-rel-calls.s | 5 +- test/ELF/ppc64-toc-restore-recursive-call.s | 17 ++- test/ELF/ppc64-toc-restore.s | 41 +++---- wasm/OutputSections.cpp | 12 +- 30 files changed, 433 insertions(+), 209 deletions(-) create mode 100644 test/COFF/guardcf-thunk.s create mode 100644 test/ELF/compressed-input-alignment.test create mode 100644 test/ELF/emit-relocs-mergeable2.s create mode 100644 test/ELF/ppc64-local-entry.s create mode 100644 test/ELF/ppc64-long-branch-init.s diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp index 6acfaf9a445..56b797451cf 100644 --- a/COFF/Writer.cpp +++ b/COFF/Writer.cpp @@ -1351,19 +1351,47 @@ static void addSymbolToRVASet(SymbolRVASet &RVASet, Defined *S) { // symbol in an executable section. static void maybeAddAddressTakenFunction(SymbolRVASet &AddressTakenSyms, Symbol *S) { - auto *D = dyn_cast_or_null(S); - - // Ignore undefined symbols and references to non-functions (e.g. globals and - // labels). - if (!D || - D->getCOFFSymbol().getComplexType() != COFF::IMAGE_SYM_DTYPE_FUNCTION) + if (!S) return; - // Mark the symbol as address taken if it's in an executable section. - Chunk *RefChunk = D->getChunk(); - OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr; - if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE) - addSymbolToRVASet(AddressTakenSyms, D); + switch (S->kind()) { + case Symbol::DefinedLocalImportKind: + case Symbol::DefinedImportDataKind: + // Defines an __imp_ pointer, so it is data, so it is ignored. + break; + case Symbol::DefinedCommonKind: + // Common is always data, so it is ignored. + break; + case Symbol::DefinedAbsoluteKind: + case Symbol::DefinedSyntheticKind: + // Absolute is never code, synthetic generally isn't and usually isn't + // determinable. + break; + case Symbol::LazyKind: + case Symbol::UndefinedKind: + // Undefined symbols resolve to zero, so they don't have an RVA. Lazy + // symbols shouldn't have relocations. + break; + + case Symbol::DefinedImportThunkKind: + // Thunks are always code, include them. + addSymbolToRVASet(AddressTakenSyms, cast(S)); + break; + + case Symbol::DefinedRegularKind: { + // This is a regular, defined, symbol from a COFF file. Mark the symbol as + // address taken if the symbol type is function and it's in an executable + // section. + auto *D = cast(S); + if (D->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) { + Chunk *RefChunk = D->getChunk(); + OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr; + if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE) + addSymbolToRVASet(AddressTakenSyms, D); + } + break; + } + } } // Visit all relocations from all section contributions of this object file and diff --git a/ELF/Arch/PPC64.cpp b/ELF/Arch/PPC64.cpp index 8a320c9a4e9..cbfa8073d33 100644 --- a/ELF/Arch/PPC64.cpp +++ b/ELF/Arch/PPC64.cpp @@ -113,6 +113,7 @@ class PPC64 final : public TargetInfo { void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; @@ -759,6 +760,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, return !inBranchRange(Type, BranchAddr, S.getVA()); } +uint32_t PPC64::getThunkSectionSpacing() const { + // See comment in Arch/ARM.cpp for a more detailed explanation of + // getThunkSectionSpacing(). For PPC64 we pick the constant here based on + // R_PPC64_REL24, which is used by unconditional branch instructions. + // 0x2000000 = (1 << 24-1) * 4 + return 0x2000000; +} + bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { int64_t Offset = Dst - Src; if (Type == R_PPC64_REL14) diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp index 839bff7011e..ca2f49c07bb 100644 --- a/ELF/InputSection.cpp +++ b/ELF/InputSection.cpp @@ -248,6 +248,7 @@ void InputSectionBase::parseCompressedHeader() { } UncompressedSize = Hdr->ch_size; + Alignment = std::max(Hdr->ch_addralign, 1); RawData = RawData.slice(sizeof(*Hdr)); return; } @@ -265,6 +266,7 @@ void InputSectionBase::parseCompressedHeader() { } UncompressedSize = Hdr->ch_size; + Alignment = std::max(Hdr->ch_addralign, 1); RawData = RawData.slice(sizeof(*Hdr)); } @@ -578,10 +580,6 @@ static int64_t getTlsTpOffset() { // Variant 1. The thread pointer points to a TCB with a fixed 2-word size, // followed by a variable amount of alignment padding, followed by the TLS // segment. - // - // NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld - // effectively increases the TCB size to 8 words for Android compatibility. - // It accomplishes this by increasing the segment's alignment. return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align); case EM_386: case EM_X86_64: diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp index b1a3f8bc70a..10675588ebe 100644 --- a/ELF/SyntheticSections.cpp +++ b/ELF/SyntheticSections.cpp @@ -2001,6 +2001,11 @@ template void SymbolTableSection::writeTo(uint8_t *Buf) { ESym->setVisibility(Sym->Visibility); } + // The 3 most significant bits of st_other are used by OpenPOWER ABI. + // See getPPC64GlobalEntryToLocalEntryOffset() for more details. + if (Config->EMachine == EM_PPC64) + ESym->st_other |= Sym->StOther & 0xe0; + ESym->st_name = Ent.StrTabOffset; ESym->st_shndx = getSymSectionIndex(Ent.Sym); diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp index 17f4c7961d3..36ba0253263 100644 --- a/ELF/Writer.cpp +++ b/ELF/Writer.cpp @@ -547,6 +547,11 @@ static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName, if (Config->Discard == DiscardPolicy::None) return true; + // If -emit-reloc is given, all symbols including local ones need to be + // copied because they may be referenced by relocations. + if (Config->EmitRelocs) + return true; + // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if // * --discard-locals is used. @@ -2192,17 +2197,6 @@ template void Writer::setPhdrs() { } if (P->p_type == PT_TLS && P->p_memsz) { - if (!Config->Shared && - (Config->EMachine == EM_ARM || Config->EMachine == EM_AARCH64)) { - // On ARM/AArch64, reserve extra space (8 words) between the thread - // pointer and an executable's TLS segment by overaligning the segment. - // This reservation is needed for backwards compatibility with Android's - // TCB, which allocates several slots after the thread pointer (e.g. - // TLS_SLOT_STACK_GUARD==5). For simplicity, this overalignment is also - // done on other operating systems. - P->p_align = std::max(P->p_align, Config->Wordsize * 8); - } - // The TLS pointer goes after PT_TLS for variant 2 targets. At least glibc // will align it, so round up the size to make sure the offsets are // correct. diff --git a/test/COFF/guardcf-thunk.s b/test/COFF/guardcf-thunk.s new file mode 100644 index 00000000000..0969c580ac9 --- /dev/null +++ b/test/COFF/guardcf-thunk.s @@ -0,0 +1,43 @@ +# REQUIRES: x86 + +# Make a DLL that exports exportfn1. +# RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj +# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /implib:%t.lib + +# Make an obj that takes the address of that exported function. +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-msvc %s -o %t2.obj +# RUN: lld-link -entry:main -guard:cf %t2.obj %t.lib -nodefaultlib -out:%t.exe +# RUN: llvm-readobj -coff-load-config %t.exe | FileCheck %s + +# Check that the gfids table contains *exactly* two entries, one for exportfn1 +# and one for main. +# CHECK: GuardFidTable [ +# CHECK-NEXT: 0x{{[0-9A-Fa-f]+0$}} +# CHECK-NEXT: 0x{{[0-9A-Fa-f]+0$}} +# CHECK-NEXT: ] + + + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +@feat.00 = 0x001 + + .section .text,"rx" + .def main; .scl 2; .type 32; .endef + .global main +main: + leaq exportfn1(%rip), %rax + retq + + .section .rdata,"dr" +.globl _load_config_used +_load_config_used: + .long 256 + .fill 124, 1, 0 + .quad __guard_fids_table + .quad __guard_fids_count + .long __guard_flags + .fill 128, 1, 0 + diff --git a/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s b/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s index 2db5c7e36bb..bff72d3725f 100644 --- a/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s +++ b/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s @@ -26,9 +26,9 @@ _start: // CHECK: _start: // CHECK-NEXT: 210ff8: 41 d0 3b d5 mrs x1, TPIDR_EL0 // CHECK-NEXT: 210ffc: 00 00 a0 d2 movz x0, #0, lsl #16 -// CHECK-NEXT: 211000: 01 08 80 f2 movk x1, #64 +// CHECK-NEXT: 211000: 01 02 80 f2 movk x1, #16 // CHECK-NEXT: 211004: 00 00 a0 d2 movz x0, #0, lsl #16 -// CHECK-NEXT: 211008: 01 08 80 f2 movk x1, #64 +// CHECK-NEXT: 211008: 01 02 80 f2 movk x1, #16 // CHECK-NEXT: 21100c: c0 03 5f d6 ret .type v,@object diff --git a/test/ELF/aarch64-tls-gdle.s b/test/ELF/aarch64-tls-gdle.s index 882ec8c1ae1..19fdc1d35c5 100644 --- a/test/ELF/aarch64-tls-gdle.s +++ b/test/ELF/aarch64-tls-gdle.s @@ -9,11 +9,11 @@ #RELOC: Relocations [ #RELOC-NEXT: ] -# TCB size = 64 and foo is first element from TLS register. +# TCB size = 0x16 and foo is first element from TLS register. # CHECK: Disassembly of section .text: # CHECK: _start: # CHECK: 210000: 00 00 a0 d2 movz x0, #0, lsl #16 -# CHECK: 210004: 00 08 80 f2 movk x0, #64 +# CHECK: 210004: 00 02 80 f2 movk x0, #16 # CHECK: 210008: 1f 20 03 d5 nop # CHECK: 21000c: 1f 20 03 d5 nop diff --git a/test/ELF/aarch64-tls-iele.s b/test/ELF/aarch64-tls-iele.s index 0229d6676cd..9fec4ee7dc9 100644 --- a/test/ELF/aarch64-tls-iele.s +++ b/test/ELF/aarch64-tls-iele.s @@ -9,13 +9,13 @@ # RELOC: Relocations [ # RELOC-NEXT: ] -# TCB size = 64 and foo is first element from TLS register. +# TCB size = 0x16 and foo is first element from TLS register. # CHECK: Disassembly of section .text: # CHECK: _start: # CHECK-NEXT: 210000: 00 00 a0 d2 movz x0, #0, lsl #16 -# CHECK-NEXT: 210004: 80 08 80 f2 movk x0, #68 +# CHECK-NEXT: 210004: 80 02 80 f2 movk x0, #20 # CHECK-NEXT: 210008: 00 00 a0 d2 movz x0, #0, lsl #16 -# CHECK-NEXT: 21000c: 00 08 80 f2 movk x0, #64 +# CHECK-NEXT: 21000c: 00 02 80 f2 movk x0, #16 .section .tdata .align 2 diff --git a/test/ELF/aarch64-tls-le.s b/test/ELF/aarch64-tls-le.s index 49c322facb1..eda13758656 100644 --- a/test/ELF/aarch64-tls-le.s +++ b/test/ELF/aarch64-tls-le.s @@ -17,12 +17,12 @@ _start: add x0, x0, :tprel_hi12:v2 add x0, x0, :tprel_lo12_nc:v2 -# TCB size = 64 and foo is first element from TLS register. +# TCB size = 0x16 and foo is first element from TLS register. #CHECK: Disassembly of section .text: #CHECK: _start: #CHECK: 210000: 40 d0 3b d5 mrs x0, TPIDR_EL0 #CHECK: 210004: 00 00 40 91 add x0, x0, #0, lsl #12 -#CHECK: 210008: 00 00 01 91 add x0, x0, #64 +#CHECK: 210008: 00 40 00 91 add x0, x0, #16 #CHECK: 21000c: 40 d0 3b d5 mrs x0, TPIDR_EL0 #CHECK: 210010: 00 fc 7f 91 add x0, x0, #4095, lsl #12 #CHECK: 210014: 00 e0 3f 91 add x0, x0, #4088 @@ -36,9 +36,9 @@ v1: .word 0 .size v1, 4 -# The current offset from the thread pointer is 68. Raise it to just below the +# The current offset from the thread pointer is 20. Raise it to just below the # 24-bit limit. -.space (0xfffff8 - 68) +.space (0xfffff8 - 20) .type v2,@object .globl v2 diff --git a/test/ELF/aarch64-tlsld-ldst.s b/test/ELF/aarch64-tlsld-ldst.s index 8ebdc2f152a..3144ca5d99a 100644 --- a/test/ELF/aarch64-tlsld-ldst.s +++ b/test/ELF/aarch64-tlsld-ldst.s @@ -26,27 +26,27 @@ _start: mrs x8, TPIDR_EL0 // CHECK: _start: // CHECK-NEXT: 210000: 48 d0 3b d5 mrs x8, TPIDR_EL0 -// 0x0 + c40 = 0xc40 = tcb (64-bytes) + var0 -// CHECK-NEXT: 210004: 08 01 40 91 add x8, x8, #0, lsl #12 -// CHECK-NEXT: 210008: 14 11 c3 3d ldr q20, [x8, #3136] -// 0x1000 + 0x850 = 0x1850 = tcb + var1 -// CHECK-NEXT: 21000c: 08 05 40 91 add x8, x8, #1, lsl #12 -// CHECK-NEXT: 210010: 00 29 44 f9 ldr x0, [x8, #2128] -// 0x2000 + 0x458 = 0x2458 = tcb + var2 -// CHECK-NEXT: 210014: 08 09 40 91 add x8, x8, #2, lsl #12 -// CHECK-NEXT: 210018: 00 59 44 b9 ldr w0, [x8, #1112] -// 0x3000 + 0x5c = 0x305c = tcb + var3 -// CHECK-NEXT: 21001c: 08 0d 40 91 add x8, x8, #3, lsl #12 -// CHECK-NEXT: 210020: 00 b9 40 79 ldrh w0, [x8, #92] -// 0x3000 + 0xc5e = 0x3c5e = tcb + var4 -// CHECK-NEXT: 210024: 08 0d 40 91 add x8, x8, #3, lsl #12 -// CHECK-NEXT: 210028: 00 79 71 39 ldrb w0, [x8, #3166] +// 0x0 + c10 = 0xc10 = tcb (16-bytes) + var0 +// CHECK-NEXT: 210004: 08 01 40 91 add x8, x8, #0, lsl #12 +// CHECK-NEXT: 210008: 14 05 c3 3d ldr q20, [x8, #3088] +// 0x1000 + 0x820 = 0x1820 = tcb + var1 +// CHECK-NEXT: 21000c: 08 05 40 91 add x8, x8, #1, lsl #12 +// CHECK-NEXT: 210010: 00 11 44 f9 ldr x0, [x8, #2080] +// 0x2000 + 0x428 = 0x2428 = tcb + var2 +// CHECK-NEXT: 210014: 08 09 40 91 add x8, x8, #2, lsl #12 +// CHECK-NEXT: 210018: 00 29 44 b9 ldr w0, [x8, #1064] +// 0x3000 + 0x2c = 0x302c = tcb + var3 +// CHECK-NEXT: 21001c: 08 0d 40 91 add x8, x8, #3, lsl #12 +// CHECK-NEXT: 210020: 00 59 40 79 ldrh w0, [x8, #44] +// 0x3000 + 0xc2e = 0x32ce = tcb + var4 +// CHECK-NEXT: 210024: 08 0d 40 91 add x8, x8, #3, lsl #12 +// CHECK-NEXT: 210028: 00 b9 70 39 ldrb w0, [x8, #3118] -// CHECK-SYMS: 0000000000000c00 16 TLS GLOBAL DEFAULT 2 var0 -// CHECK-SYMS-NEXT: 0000000000001810 8 TLS GLOBAL DEFAULT 2 var1 -// CHECK-SYMS-NEXT: 0000000000002418 4 TLS GLOBAL DEFAULT 2 var2 -// CHECK-SYMS-NEXT: 000000000000301c 2 TLS GLOBAL DEFAULT 2 var3 -// CHECK-SYMS-NEXT: 0000000000003c1e 1 TLS GLOBAL DEFAULT 2 var4 +// CHECK-SYMS: 0000000000000c00 0 TLS GLOBAL DEFAULT 2 var0 +// CHECK-SYMS-NEXT: 0000000000001810 4 TLS GLOBAL DEFAULT 2 var1 +// CHECK-SYMS-NEXT: 0000000000002418 2 TLS GLOBAL DEFAULT 2 var2 +// CHECK-SYMS-NEXT: 000000000000301c 1 TLS GLOBAL DEFAULT 2 var3 +// CHECK-SYMS-NEXT: 0000000000003c1e 0 TLS GLOBAL DEFAULT 2 var4 .globl var0 .globl var1 @@ -59,12 +59,12 @@ _start: mrs x8, TPIDR_EL0 .type var3,@object .section .tbss,"awT",@nobits - .balign 64 + .balign 16 .space 1024 * 3 var0: .quad 0 .quad 0 - .size var0, 16 + .size var1, 16 .space 1024 * 3 var1: .quad 0 @@ -72,14 +72,14 @@ var1: .space 1024 * 3 var2: .word 0 - .size var2, 4 + .size var1, 4 .space 1024 * 3 var3: .hword 0 - .size var3, 2 + .size var2, 2 .space 1024 * 3 var4: .byte 0 - .size var4, 1 + .size var3, 1 .space 1024 * 3 diff --git a/test/ELF/arm-tls-le32.s b/test/ELF/arm-tls-le32.s index f9a5fa9b2fc..7834dedf1be 100644 --- a/test/ELF/arm-tls-le32.s +++ b/test/ELF/arm-tls-le32.s @@ -69,9 +69,9 @@ x: // CHECK: Disassembly of section .text: // CHECK-NEXT: _start: -// offset of x from Thread pointer = (TcbSize + 0x0 = 0x20) -// CHECK-NEXT: 11000: 20 00 00 00 -// offset of z from Thread pointer = (TcbSize + 0x8 = 0x28) -// CHECK-NEXT: 11004: 28 00 00 00 -// offset of y from Thread pointer = (TcbSize + 0x4 = 0x24) -// CHECK-NEXT: 11008: 24 00 00 00 +// offset of x from Thread pointer = (TcbSize + 0x0 = 0x8) +// CHECK-NEXT: 11000: 08 00 00 00 +// offset of z from Thread pointer = (TcbSize + 0x8 = 0x10) +// CHECK-NEXT: 11004: 10 00 00 00 +// offset of y from Thread pointer = (TcbSize + 0x4 = 0xc) +// CHECK-NEXT: 11008: 0c 00 00 00 diff --git a/test/ELF/arm-tls-norelax-ie-le.s b/test/ELF/arm-tls-norelax-ie-le.s index 11c3e4f5dc1..4a52f547f0a 100644 --- a/test/ELF/arm-tls-norelax-ie-le.s +++ b/test/ELF/arm-tls-norelax-ie-le.s @@ -37,5 +37,5 @@ x2: .type x2, %object // CHECK: Contents of section .got: -// x1 at offset 0x20 from TP, x2 at offset 0x24 from TP. Offsets include TCB size of 0x20 -// CHECK-NEXT: 13064 20000000 24000000 +// x1 at offset 8 from TP, x2 at offset 0xc from TP. Offsets include TCB size of 8 +// CHECK-NEXT: 13064 08000000 0c000000 diff --git a/test/ELF/compressed-input-alignment.test b/test/ELF/compressed-input-alignment.test new file mode 100644 index 00000000000..a1f679034b7 --- /dev/null +++ b/test/ELF/compressed-input-alignment.test @@ -0,0 +1,67 @@ +# REQUIRES: zlib, x86 + +# RUN: yaml2obj -docnum=1 %s -o %t.o +# RUN: ld.lld %t.o %t.o -o %t2 +# RUN: llvm-readobj -sections -section-data %t2 | FileCheck %s + +# RUN: yaml2obj -docnum=2 %s -o %t.o +# RUN: ld.lld %t.o %t.o -o %t2 +# RUN: llvm-readobj -sections -section-data %t2 | FileCheck %s + +# CHECK: Name: .debug_info +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x0 +# CHECK-NEXT: Offset: 0xE8 +# CHECK-NEXT: Size: 108 +# CHECK-NEXT: Link: 0 +# CHECK-NEXT: Info: 0 +# CHECK-NEXT: AddressAlignment: 1 +# CHECK-NEXT: EntrySize: 0 +# CHECK-NEXT: SectionData ( +# CHECK-NEXT: 0000: {{.*}} |ABCDEFGHIJKLMNOP| +# CHECK-NEXT: 0010: {{.*}} |QRSTUVWXYZ.ABCDE| +# CHECK-NEXT: 0020: {{.*}} |FGHIJKLMNOPQRSTU| +# CHECK-NEXT: 0030: {{.*}} |VWXYZ.ABCDEFGHIJ| +# CHECK-NEXT: 0040: {{.*}} |KLMNOPQRSTUVWXYZ| +# CHECK-NEXT: 0050: {{.*}} |.ABCDEFGHIJKLMNO| +# CHECK-NEXT: 0060: {{.*}} |PQRSTUVWXYZ.| +# CHECK-NEXT: ) +# CHECK-NEXT: } + +## YAML below is produced from the following code. AddressAlign of .debug_info is 8, +## while compressed header has ch_addralign = 1. LLD had a bug and did not use the +## value of ch_addralign at all. We produced broken section content. +## +## .section .debug_info,"",@progbits +## .string "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +## .string "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .debug_info + Type: SHT_PROGBITS + Flags: [ SHF_COMPRESSED ] + AddressAlign: 0x0000000000000008 + Content: 010000000000000036000000000000000100000000000000789C73747276717573F7F0F4F2F6F1F5F30F080C0A0E090D0B8F888C6270C42D0500ADA00FBF + +## YAML below is the same as above, with a single change: ch_addralign field of the compressed +## header was set to 0. This is allowed by the standard, we have to support it. +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .debug_info + Type: SHT_PROGBITS + Flags: [ SHF_COMPRESSED ] + AddressAlign: 0x0000000000000008 + Content: 010000000000000036000000000000000000000000000000789C73747276717573F7F0F4F2F6F1F5F30F080C0A0E090D0B8F888C6270C42D0500ADA00FBF diff --git a/test/ELF/eh-frame-hdr-augmentation.s b/test/ELF/eh-frame-hdr-augmentation.s index 934f9200a27..c51ea1c4976 100644 --- a/test/ELF/eh-frame-hdr-augmentation.s +++ b/test/ELF/eh-frame-hdr-augmentation.s @@ -11,7 +11,7 @@ // CHECK-NEXT: Code alignment factor: 1 // CHECK-NEXT: Data alignment factor: -8 // CHECK-NEXT: Return address column: 16 -// CHECK-NEXT: Personality Address: 00000dad +// CHECK-NEXT: Personality Address: 0000000000000dad // CHECK-NEXT: Augmentation data: // CHECK: DW_CFA_def_cfa: reg7 +8 @@ -20,7 +20,7 @@ // CHECK-NEXT: DW_CFA_nop: // CHECK: 00000020 00000014 00000024 FDE cie=00000024 pc=00000d98...00000d98 -// CHECK-NEXT: LSDA Address: 00000d8f +// CHECK-NEXT: LSDA Address: 0000000000000d8f // CHECK-NEXT: DW_CFA_nop: // CHECK-NEXT: DW_CFA_nop: // CHECK-NEXT: DW_CFA_nop: diff --git a/test/ELF/emit-relocs-mergeable2.s b/test/ELF/emit-relocs-mergeable2.s new file mode 100644 index 00000000000..22d4cf4238a --- /dev/null +++ b/test/ELF/emit-relocs-mergeable2.s @@ -0,0 +1,14 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld --emit-relocs %t.o -o %t.exe +# RUN: llvm-readelf --relocations %t.exe | FileCheck %s + +# CHECK: 0000000000201004 000000010000000b R_X86_64_32S 0000000000200120 .Lfoo + 8 + +.globl _start +_start: + movq .Lfoo+8, %rax +.section .rodata.cst16,"aM",@progbits,16 +.Lfoo: + .quad 0 + .quad 0 diff --git a/test/ELF/gc-sections-metadata-startstop.s b/test/ELF/gc-sections-metadata-startstop.s index ede1899698c..0a1efb7e607 100644 --- a/test/ELF/gc-sections-metadata-startstop.s +++ b/test/ELF/gc-sections-metadata-startstop.s @@ -11,7 +11,7 @@ # CHECK-NOT: yy # CHECK: SYMBOL TABLE: -# CHECK: xx 00000000 __start_xx +# CHECK: xx 00000000 .protected __start_xx # CHECK: w *UND* 00000000 __start_yy .weak __start_xx diff --git a/test/ELF/mips-micro-relocs.s b/test/ELF/mips-micro-relocs.s index b539aa94676..566f6810f6e 100644 --- a/test/ELF/mips-micro-relocs.s +++ b/test/ELF/mips-micro-relocs.s @@ -39,9 +39,9 @@ # EL-NEXT: 20028: 00 00 00 00 nop # EL-NEXT: 2002c: 00 94 e8 ff b -44 -# SYM: 00037ff0 .got 00000000 .hidden _gp -# SYM: 00020000 g F .text 00000000 foo -# SYM: 00020010 .text 00000000 __start +# SYM: 00037ff0 .got 00000000 .hidden _gp +# SYM: 00020000 g F .text 00000000 0x80 foo +# SYM: 00020010 .text 00000000 0x80 __start .text .set micromips diff --git a/test/ELF/mips-micror6-relocs.s b/test/ELF/mips-micror6-relocs.s index ca2c1c064f5..fb450ba5cf3 100644 --- a/test/ELF/mips-micror6-relocs.s +++ b/test/ELF/mips-micror6-relocs.s @@ -26,8 +26,8 @@ # EL-NEXT: 20014: 7f 80 f6 ff beqzc $3, -36 # EL-NEXT: 20018: ff b7 f4 ff balc -24 -# SYM: 00020000 g F .text 00000000 foo -# SYM: 00020010 .text 00000000 __start +# SYM: 00020000 g F .text 00000000 0x80 foo +# SYM: 00020010 .text 00000000 0x80 __start .text .set micromips diff --git a/test/ELF/ppc64-bsymbolic-toc-restore.s b/test/ELF/ppc64-bsymbolic-toc-restore.s index 49d347c4899..d467d22ff7b 100644 --- a/test/ELF/ppc64-bsymbolic-toc-restore.s +++ b/test/ELF/ppc64-bsymbolic-toc-restore.s @@ -53,7 +53,7 @@ caller: # CHECK-LABEL: caller # CHECK: bl .+44 # CHECK-NEXT: mr 31, 3 -# CHECK-NEXT: bl .+67108816 +# CHECK-NEXT: bl .+44 # CHECK-NEXT: ld 2, 24(1) # CHECK-NEXT: add 3, 3, 31 # CHECK-NEXT: addi 1, 1, 32 @@ -63,6 +63,6 @@ caller: # CHECK-EMPTY: # CHECK-NEXT: def: # CHECK-NEXT: addis 2, 12, 2 -# CHECK-NEXT: addi 2, 2, -32636 +# CHECK-NEXT: addi 2, 2, -32616 # CHECK-NEXT: li 3, 55 # CHECK-NEXT: blr diff --git a/test/ELF/ppc64-call-reach.s b/test/ELF/ppc64-call-reach.s index a02bfa82993..b843e7e531c 100644 --- a/test/ELF/ppc64-call-reach.s +++ b/test/ELF/ppc64-call-reach.s @@ -3,16 +3,16 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o # RUN: ld.lld --defsym callee=0x12010010 --defsym tail_callee=0x12010020 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s # RUN: ld.lld --defsym callee=0x12010010 --defsym tail_callee=0x12010020 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s # RUN: ld.lld --defsym callee=0xE010014 --defsym tail_callee=0xE010024 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck --check-prefix=NEGOFFSET %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=NEGOFFSET %s # RUN: ld.lld --defsym callee=0x12010018 --defsym tail_callee=0x12010028 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck --check-prefix=THUNK %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=THUNK %s # RUN: llvm-readelf --sections %t | FileCheck --check-prefix=BRANCHLT %s # RUN: not ld.lld --defsym callee=0x1001002D --defsym tail_callee=0x1001002F \ # RUN: %t.o -o %t 2>&1 | FileCheck --check-prefix=MISSALIGNED %s @@ -20,16 +20,16 @@ # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o # RUN: ld.lld --defsym callee=0x12010010 --defsym tail_callee=0x12010020 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s # RUN: ld.lld --defsym callee=0x12010010 --defsym tail_callee=0x12010020 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s # RUN: ld.lld --defsym callee=0xE010014 --defsym tail_callee=0xE010024 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck --check-prefix=NEGOFFSET %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=NEGOFFSET %s # RUN: ld.lld --defsym callee=0x12010018 --defsym tail_callee=0x12010028 \ # RUN: %t.o -o %t -# RUN: llvm-objdump -d %t | FileCheck --check-prefix=THUNK %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=THUNK %s # RUN: llvm-readelf --sections %t | FileCheck --check-prefix=BRANCHLT %s # RUN: not ld.lld --defsym callee=0x1001002D --defsym tail_callee=0x1001002F \ # RUN: %t.o -o %t 2>&1 | FileCheck --check-prefix=MISSALIGNED %s @@ -58,37 +58,33 @@ test: # Check that we are branching to the definitions, and not range-extending # thunks. # CHECK-LABEL: test -# CHECK: 10010014: {{.*}} bl .+33554428 -# CHECK: 10010024: {{.*}} b .+33554428 +# CHECK: 10010014: bl .+33554428 +# CHECK: 10010024: b .+33554428 # NEGOFFSET-LABEL: test -# NEGOFFSET: 10010014: {{.*}} bl .+33554432 -# NEGOFFSET: 10010024: {{.*}} b .+33554432 +# NEGOFFSET: 10010014: bl .-33554432 +# NEGOFFSET: 10010024: b .+33554432 + +# THUNK-LABEL: test: +# THUNK: 10010014: bl .+20 +# THUNK: 10010024: b .+20 # .branch_lt[0] # THUNK-LABEL: __long_branch_callee: -# THUNK-NEXT: 10010000: {{.*}} addis 12, 2, -1 +# THUNK-NEXT: 10010028: addis 12, 2, -1 # THUNK-NEXT: ld 12, -32768(12) # THUNK-NEXT: mtctr 12 # THUNK-NEXT: bctr # .branch_lt[1] # THUNK-LABEL: __long_branch_tail_callee: -# THUNK-NEXT: 10010010: {{.*}} addis 12, 2, -1 +# THUNK-NEXT: 10010038: addis 12, 2, -1 # THUNK-NEXT: ld 12, -32760(12) # THUNK-NEXT: mtctr 12 # THUNK-NEXT: bctr -# Each call now branches to a thunk, and although it is printed as positive -# the offset is interpreted as a signed 26 bit value so 67108812 is actually -# -52. -# THUNK-LABEL: test: -# THUNK: 10010034: {{.*}} bl .+67108812 -# THUNK: 10010044: {{.*}} b .+67108812 - # The offset from the TOC to the .branch_lt section is (-1 << 16) - 32768. # Name Type Address Off Size # BRANCHLT: .branch_lt PROGBITS 0000000010020000 020000 000010 # BRANCHLT: .got PROGBITS 0000000010030000 030000 000008 # BRANCHLT-NOT: .plt - diff --git a/test/ELF/ppc64-ifunc.s b/test/ELF/ppc64-ifunc.s index 6f2d3318b9c..32e317f3c05 100644 --- a/test/ELF/ppc64-ifunc.s +++ b/test/ELF/ppc64-ifunc.s @@ -1,79 +1,67 @@ # REQUIRES: ppc # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o -# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o -# RUN: ld.lld -shared %t2.o -o %t2.so -# RUN: ld.lld %t.o %t2.so -o %t -# RUN: llvm-objdump -D %t | FileCheck %s -# RUN: llvm-readelf -dynamic-table %t | FileCheck --check-prefix=DT %s -# RUN: llvm-readelf -dyn-relocations %t | FileCheck --check-prefix=DYNREL %s +# RUN: ld.lld %t.o -o %t +# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s +# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o -# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o -# RUN: ld.lld -shared %t2.o -o %t2.so -# RUN: ld.lld %t.o %t2.so -o %t -# RUN: llvm-objdump -D %t | FileCheck %s -# RUN: llvm-readelf -dynamic-table %t | FileCheck --check-prefix=DT %s -# RUN: llvm-readelf -dyn-relocations %t | FileCheck --check-prefix=DYNREL %s +# RUN: ld.lld %t.o -o %t +# RUN: llvm-nm %t | FileCheck --check-prefix=NM %s +# RUN: llvm-readelf -S %t | FileCheck --check-prefix=SECTIONS %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s -# CHECK: Disassembly of section .text: +# NM-DAG: 0000000010028000 d .TOC. +# NM-DAG: 0000000010010000 T ifunc +# NM-DAG: 0000000010010004 T ifunc2 -# Tocbase + (0 << 16) + 32560 -# 0x100280e0 + 0 + 32560 = 0x10030010 (.plt[2]) -# CHECK: __plt_foo: -# CHECK-NEXT: std 2, 24(1) -# CHECK-NEXT: addis 12, 2, 0 -# CHECK-NEXT: ld 12, 32560(12) -# CHECK-NEXT: mtctr 12 -# CHECK-NEXT: bctr +# SECTIONS: .plt NOBITS 0000000010030000 -# Tocbase + (0 << 16) + 32568 -# 0x100280e0 + 0 + 32568 = 0x1003018 (.plt[3]) +# __plt_ifunc - . = 0x10010020 - 0x10010010 = 16 +# __plt_ifunc2 - . = 0x10010044 - 0x10010018 = 28 +# CHECK: _start: +# CHECK-NEXT: addis 2, 12, 1 +# CHECK-NEXT: addi 2, 2, 32760 +# CHECK-NEXT: 10010010: bl .+16 +# CHECK-NEXT: ld 2, 24(1) +# CHECK-NEXT: 10010018: bl .+28 +# CHECK-NEXT: ld 2, 24(1) + +# .plt[0] - .TOC. = 0x10030000 - 0x10028000 = (1<<16) - 32768 # CHECK: __plt_ifunc: # CHECK-NEXT: std 2, 24(1) -# CHECK-NEXT: addis 12, 2, 0 -# CHECK-NEXT: ld 12, 32568(12) +# CHECK-NEXT: addis 12, 2, 1 +# CHECK-NEXT: ld 12, -32768(12) # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr -# CHECK: ifunc: -# CHECK-NEXT: 10010028: {{.*}} nop +# .plt[1] - .TOC. = 0x10030000+8 - 0x10028000 = (1<<16) - 32760 +# CHECK: __plt_ifunc2: +# CHECK-NEXT: std 2, 24(1) +# CHECK-NEXT: addis 12, 2, 1 +# CHECK-NEXT: ld 12, -32760(12) +# CHECK-NEXT: mtctr 12 +# CHECK-NEXT: bctr -# CHECK: _start: -# CHECK-NEXT: addis 2, 12, 2 -# CHECK-NEXT: addi 2, 2, -32588 -# CHECK-NEXT: bl .+67108812 -# CHECK-NEXT: ld 2, 24(1) -# CHECK-NEXT: bl .+67108824 -# CHECK-NEXT: ld 2, 24(1) - -# Check tocbase -# CHECK: Disassembly of section .got: -# CHECK-NEXT: .got: -# CHECK-NEXT: 100200e0 - -# Check .plt address -# DT_PLTGOT should point to the start of the .plt section. -# DT: 0x0000000000000003 PLTGOT 0x10030000 - -# Check that we emit the correct dynamic relocation type for an ifunc -# DYNREL: 'PLT' relocation section at offset 0x{{[0-9a-f]+}} contains 48 bytes: -# 48 bytes --> 2 Elf64_Rela relocations -# DYNREL-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -# DYNREL-NEXT: {{[0-9a-f]+}} {{[0-9a-f]+}} R_PPC64_JMP_SLOT {{0+}} foo + 0 -# DYNREL-NEXT: {{[0-9a-f]+}} {{[0-9a-f]+}} R_PPC64_IRELATIVE 10010028 - - - .text - .abiversion 2 +# Check that we emit 2 R_PPC64_IRELATIVE. +# DYNREL: R_PPC64_IRELATIVE 10010000 +# DYNREL: R_PPC64_IRELATIVE 10010004 .type ifunc STT_GNU_IFUNC .globl ifunc ifunc: - nop + nop - .global _start - .type _start,@function +.type ifunc2 STT_GNU_IFUNC +.globl ifunc2 +ifunc2: + nop + +.global _start +.type _start,@function _start: .Lfunc_gep0: @@ -81,7 +69,7 @@ _start: addi 2, 2, .TOC.-.Lfunc_gep0@l .Lfunc_lep0: .localentry _start, .Lfunc_lep0-.Lfunc_gep0 - bl foo - nop bl ifunc nop + bl ifunc2 + nop diff --git a/test/ELF/ppc64-local-dynamic.s b/test/ELF/ppc64-local-dynamic.s index 6ed3b0fd8f0..87e33b784b8 100644 --- a/test/ELF/ppc64-local-dynamic.s +++ b/test/ELF/ppc64-local-dynamic.s @@ -113,7 +113,7 @@ k: // Dis: test: // Dis: addis 3, 2, 0 // Dis-NEXT: addi 3, 3, -32760 -// Dis-NEXT: bl .+67108804 +// Dis-NEXT: bl .+60 // Dis-NEXT: ld 2, 24(1) // Dis-NEXT: addis 3, 3, 0 // Dis-NEXT: lwa 3, -32768(3) diff --git a/test/ELF/ppc64-local-entry.s b/test/ELF/ppc64-local-entry.s new file mode 100644 index 00000000000..2a2295169b9 --- /dev/null +++ b/test/ELF/ppc64-local-entry.s @@ -0,0 +1,47 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t +# RUN: ld.lld -r %t -o %t2 +# RUN: llvm-objdump -s -section=.symtab %t2 | FileCheck %s + +.text +.abiversion 2 +.globl _start +.p2align 2 +.type _start,@function + +_start: +.Lfunc_begin0: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry _start, .Lfunc_lep0-.Lfunc_gep0 + # The code below is not important, it just needs to access some + # global data or function, in order to use the TOC. + # In this case, it performs the following: + # g += 10; + # Also note that this code is not intended to be run, but only + # to check if the linker will preserve the localentry info. + addis 3, 2, g@toc@ha + addi 3, 3, g@toc@l + lwz 4, 0(3) + addi 4, 4, 10 + stw 4, 0(3) + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size _start, .Lfunc_end0-.Lfunc_begin0 + + .type g,@object # @g + .lcomm g,4,4 + +// We expect the st_other byte to be 0x60: +// localentry = 011 (gep + 2 instructions), reserved = 000, +// visibility = 00 (STV_DEFAULT) +// Currently, llvm-objdump does not support displaying +// st_other's PPC64 specific flags, thus we check the +// result of the hexdump of .symtab section. + +// CHECK: 0070 00000000 00000000 00000009 12600001 diff --git a/test/ELF/ppc64-long-branch-init.s b/test/ELF/ppc64-long-branch-init.s new file mode 100644 index 00000000000..80b3919cc45 --- /dev/null +++ b/test/ELF/ppc64-long-branch-init.s @@ -0,0 +1,43 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-pc-freebsd13.0 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s + +## .init consists of sections from several object files. Sections other than the +## last one do not have a terminator. Check we do not create a long branch stub +## in the middle. +## We currently use thunk section spacing to ensure the stub is in the end. This +## is not foolproof but good enough to not break in practice. + +# CHECK: Disassembly of section .init: +# CHECK-LABEL: _init: +# CHECK: blr +# CHECK-EMPTY: +# CHECK-LABEL: __long_branch_foo: + +.globl foo +foo: + .space 0x2000000 + blr + +.section .init,"ax",@progbits,unique,0 +.globl _init +_init: + stdu 1, -48(1) + mflr 0 + std 0, 64(1) + +.section .init,"ax",@progbits,unique,1 + bl foo + nop + +.section .init,"ax",@progbits,unique,2 + bl foo + nop + +.section .init,"ax",@progbits,unique,3 + ld 1, 0(1) + ld 0, 16(1) + mtlr 0 + blr diff --git a/test/ELF/ppc64-plt-stub.s b/test/ELF/ppc64-plt-stub.s index a644f487b8b..bf3ac09fd51 100644 --- a/test/ELF/ppc64-plt-stub.s +++ b/test/ELF/ppc64-plt-stub.s @@ -4,16 +4,19 @@ // RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld %t.o %t2.so -o %t -// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld %t.o %t2.so -o %t -// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s // CHECK: Disassembly of section .text: -// CHECK-NEXT: __plt_foo: +// CHECK-NEXT: _start: +// CHECK: 10010008: bl .+16 + +// CHECK-LABEL: 0000000010010018 __plt_foo: // CHECK-NEXT: std 2, 24(1) // CHECK-NEXT: addis 12, 2, 0 // CHECK-NEXT: ld 12, 32560(12) @@ -21,8 +24,6 @@ // CHECK-NEXT: bctr -// CHECK: _start: -// CHECK: bl .+67108824 .text .abiversion 2 .globl _start diff --git a/test/ELF/ppc64-rel-calls.s b/test/ELF/ppc64-rel-calls.s index 4c79498dc56..8423eb43f21 100644 --- a/test/ELF/ppc64-rel-calls.s +++ b/test/ELF/ppc64-rel-calls.s @@ -30,9 +30,8 @@ bar: nop blr -# FIXME: The printing here is misleading, the branch offset here is negative. -# CHECK: 1001000c: {{.*}} bl .+67108852 +# CHECK: 1001000c: {{.*}} bl .-12 # CHECK: 10010010: {{.*}} nop -# CHECK: 10010014: {{.*}} bl .+67108844 +# CHECK: 10010014: {{.*}} bl .-20 # CHECK: 10010018: {{.*}} nop # CHECK: 1001001c: {{.*}} blr diff --git a/test/ELF/ppc64-toc-restore-recursive-call.s b/test/ELF/ppc64-toc-restore-recursive-call.s index 4bedcfecf38..756a058cc56 100644 --- a/test/ELF/ppc64-toc-restore-recursive-call.s +++ b/test/ELF/ppc64-toc-restore-recursive-call.s @@ -1,8 +1,8 @@ # REQUIRES: ppc -# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o -# RUN: ld.lld -shared %t1.o -o %t -# RUN: llvm-objdump -d -r %t | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-objdump -d --no-show-raw-insn -r %t.so | FileCheck %s # For a recursive call that is interposable the linker calls the plt-stub rather # then calling the function directly. Since the call is through a plt stub and @@ -14,12 +14,11 @@ # for recursive calls as well as keeps the logic for recursive calls consistent # with non-recursive calls. -# CHECK-LABEL: __plt_recursive_func: -# CHECK-NEXT: 10000: -# CHECK-LABEL: recursive_func -# CHECK-NEXT: 10014: -# CHECK: 1003c: {{[0-9a-fA-F ]+}} bl .+67108804 -# CHECK-NEXT: ld 2, 24(1) +# CHECK-LABEL: 0000000000010000 recursive_func: +# CHECK: 10028: bl .+32 +# CHECK-NEXT: ld 2, 24(1) + +# CHECK-LABEL: 0000000000010048 __plt_recursive_func: .abiversion 2 .section ".text" diff --git a/test/ELF/ppc64-toc-restore.s b/test/ELF/ppc64-toc-restore.s index d9e06ca6e59..d65bef847a7 100644 --- a/test/ELF/ppc64-toc-restore.s +++ b/test/ELF/ppc64-toc-restore.s @@ -5,14 +5,14 @@ // RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-func.s -o %t3.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld %t.o %t2.so %t3.o -o %t -// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-func.s -o %t3.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld %t.o %t2.so %t3.o -o %t -// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s .text .abiversion 2 @@ -28,16 +28,11 @@ _start: bl foo nop bl bar_local - - -// CHECK: Disassembly of section .text: -// CHECK: _start: -// CHECK: 1001001c: {{.*}} bl .+67108836 -// CHECK-NOT: 10010020: {{.*}} nop -// CHECK: 10010020: {{.*}} ld 2, 24(1) -// CHECK: 10010024: {{.*}} bl .+67108848 -// CHECK-NOT: 10010028: {{.*}} nop -// CHECK-NOT: 10010028: {{.*}} ld 2, 24(1) +// CHECK-LABEL: _start: +// CHECK-NEXT: 10010008: bl .+64 +// CHECK-NEXT: 1001000c: ld 2, 24(1) +// CHECK-NEXT: 10010010: bl .-16 +// CHECK-EMPTY: # Calling a function in another object file which will have same # TOC base does not need a nop. If nop present, do not rewrite to @@ -47,26 +42,24 @@ _diff_object: bl foo_not_shared bl foo_not_shared nop - -// CHECK: _diff_object: -// CHECK-NEXT: 10010028: {{.*}} bl .+24 -// CHECK-NEXT: 1001002c: {{.*}} bl .+20 -// CHECK-NEXT: 10010030: {{.*}} nop +// CHECK-LABEL: _diff_object: +// CHECK-NEXT: 10010014: bl .+28 +// CHECK-NEXT: 10010018: bl .+24 +// CHECK-NEXT: 1001001c: nop # Branching to a local function does not need a nop .global noretbranch noretbranch: b bar_local -// CHECK: noretbranch: -// CHECK: 10010034: {{.*}} b .+67108832 -// CHECK-NOT: 10010038: {{.*}} nop -// CHECK-NOT: 1001003c: {{.*}} ld 2, 24(1) +// CHECK-LABEL: noretbranch: +// CHECK: 10010020: b .+67108832 +// CHECK-EMPTY: // This should come last to check the end-of-buffer condition. .global last last: bl foo nop -// CHECK: last: -// CHECK: 10010038: {{.*}} bl .+67108808 -// CHECK-NEXT: 1001003c: {{.*}} ld 2, 24(1) +// CHECK-LABEL: last: +// CHECK-NEXT: 10010024: bl .+36 +// CHECK-NEXT: 10010028: ld 2, 24(1) diff --git a/wasm/OutputSections.cpp b/wasm/OutputSections.cpp index 4123d63b746..6b7b18d4ca7 100644 --- a/wasm/OutputSections.cpp +++ b/wasm/OutputSections.cpp @@ -111,8 +111,8 @@ void CodeSection::writeTo(uint8_t *Buf) { memcpy(Buf, CodeSectionHeader.data(), CodeSectionHeader.size()); // Write code section bodies - parallelForEach(Functions, - [&](const InputChunk *Chunk) { Chunk->writeTo(Buf); }); + for (const InputChunk *Chunk : Functions) + Chunk->writeTo(Buf); } uint32_t CodeSection::numRelocations() const { @@ -176,7 +176,7 @@ void DataSection::writeTo(uint8_t *Buf) { // Write data section headers memcpy(Buf, DataSectionHeader.data(), DataSectionHeader.size()); - parallelForEach(Segments, [&](const OutputSegment *Segment) { + for (const OutputSegment *Segment : Segments) { // Write data segment header uint8_t *SegStart = Buf + Segment->SectionOffset; memcpy(SegStart, Segment->Header.data(), Segment->Header.size()); @@ -184,7 +184,7 @@ void DataSection::writeTo(uint8_t *Buf) { // Write segment data payload for (const InputChunk *Chunk : Segment->InputSegments) Chunk->writeTo(Buf); - }); + } } uint32_t DataSection::numRelocations() const { @@ -232,8 +232,8 @@ void CustomSection::writeTo(uint8_t *Buf) { Buf += NameData.size(); // Write custom sections payload - parallelForEach(InputSections, - [&](const InputSection *Section) { Section->writeTo(Buf); }); + for (const InputSection *Section : InputSections) + Section->writeTo(Buf); } uint32_t CustomSection::numRelocations() const { From 89da04f7e8a7bb6826b79d539b009bb657c84482 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Jun 2019 18:17:38 +0000 Subject: [PATCH 7/7] Vendor import of lldb release_80 branch r363030: https://llvm.org/svn/llvm-project/lldb/branches/release_80@363030 --- .../NativePDB/globals-fundamental.cpp | 2 +- lit/helper/build.py | 12 +- lit/helper/toolchain.py | 3 +- lit/lit.site.cfg.py.in | 1 + lit/tools/lldb-mi/lit.local.cfg | 2 + .../NativeRegisterContextLinux_x86_64.cpp | 136 +++++++++++------- .../Linux/NativeRegisterContextLinux_x86_64.h | 3 +- 7 files changed, 99 insertions(+), 60 deletions(-) create mode 100644 lit/tools/lldb-mi/lit.local.cfg diff --git a/lit/SymbolFile/NativePDB/globals-fundamental.cpp b/lit/SymbolFile/NativePDB/globals-fundamental.cpp index 8891eddf668..b3d3e37fbeb 100644 --- a/lit/SymbolFile/NativePDB/globals-fundamental.cpp +++ b/lit/SymbolFile/NativePDB/globals-fundamental.cpp @@ -1,5 +1,5 @@ // clang-format off -// REQUIRES: lld +// REQUIRES: lld, python // Test that we can display tag types. // RUN: %build --compiler=clang-cl --nodefaultlib -o %t.exe -- %s diff --git a/lit/helper/build.py b/lit/helper/build.py index 26f321d709f..fd52b7db4b8 100755 --- a/lit/helper/build.py +++ b/lit/helper/build.py @@ -283,19 +283,17 @@ def __init__(self, toolchain_type, args): print('Using alternate compiler "{0}" to match selected target.'.format(self.compiler)) if self.mode == 'link' or self.mode == 'compile-and-link': - self.linker = self._find_linker('link') if toolchain_type == 'msvc' else self._find_linker('lld-link') + self.linker = self._find_linker('link') if toolchain_type == 'msvc' else self._find_linker('lld-link', args.tools_dir) if not self.linker: raise ValueError('Unable to find an appropriate linker.') self.compile_env, self.link_env = self._get_visual_studio_environment() - def _find_linker(self, name): - if sys.platform == 'win32': - name = name + '.exe' + def _find_linker(self, name, search_paths=[]): compiler_dir = os.path.dirname(self.compiler) - linker_path = os.path.join(compiler_dir, name) - if not os.path.exists(linker_path): - raise ValueError('Could not find \'{}\''.format(linker_path)) + linker_path = find_executable(name, [compiler_dir] + search_paths) + if linker_path is None: + raise ValueError('Could not find \'{}\''.format(name)) return linker_path def _get_vc_install_dir(self): diff --git a/lit/helper/toolchain.py b/lit/helper/toolchain.py index 938f343badc..11aa0bcf4e7 100644 --- a/lit/helper/toolchain.py +++ b/lit/helper/toolchain.py @@ -51,7 +51,8 @@ def use_lldb_substitutions(config): llvm_config.add_tool_substitutions(primary_tools, [config.lldb_tools_dir]) - if lldbmi.was_resolved: + # lldb-mi always fails without Python support + if lldbmi.was_resolved and not config.lldb_disable_python: config.available_features.add('lldb-mi') def _use_msvc_substitutions(config): diff --git a/lit/lit.site.cfg.py.in b/lit/lit.site.cfg.py.in index fbf88efcc2f..738b25d0931 100644 --- a/lit/lit.site.cfg.py.in +++ b/lit/lit.site.cfg.py.in @@ -17,6 +17,7 @@ config.python_executable = "@PYTHON_EXECUTABLE@" config.have_zlib = @LLVM_ENABLE_ZLIB@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.lldb_bitness = 64 if @LLDB_IS_64_BITS@ else 32 +config.lldb_disable_python = @LLDB_DISABLE_PYTHON@ # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/lit/tools/lldb-mi/lit.local.cfg b/lit/tools/lldb-mi/lit.local.cfg new file mode 100644 index 00000000000..ff28e265b3f --- /dev/null +++ b/lit/tools/lldb-mi/lit.local.cfg @@ -0,0 +1,2 @@ +if not "lldb-mi" in config.available_features: + config.unsupported = True diff --git a/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp b/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp index 50bf29b094d..c7313e6f987 100755 --- a/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp +++ b/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp @@ -19,9 +19,26 @@ #include "Plugins/Process/Utility/RegisterContextLinux_i386.h" #include "Plugins/Process/Utility/RegisterContextLinux_x86_64.h" - +#include #include +// Newer toolchains define __get_cpuid_count in cpuid.h, but some +// older-but-still-supported ones (e.g. gcc 5.4.0) don't, so we +// define it locally here, following the definition in clang/lib/Headers. +static inline int get_cpuid_count(unsigned int __leaf, + unsigned int __subleaf, + unsigned int *__eax, unsigned int *__ebx, + unsigned int *__ecx, unsigned int *__edx) +{ + unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); + + if (__max_leaf == 0 || __max_leaf < __leaf) + return 0; + + __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} + using namespace lldb_private; using namespace lldb_private::process_linux; @@ -268,12 +285,29 @@ CreateRegisterInfoInterface(const ArchSpec &target_arch) { } } +// Return the size of the XSTATE area supported on this cpu. It is necessary to +// allocate the full size of the area even if we do not use/recognise all of it +// because ptrace(PTRACE_SETREGSET, NT_X86_XSTATE) will refuse to write to it if +// we do not pass it a buffer of sufficient size. The size is always at least +// sizeof(FPR) so that the allocated buffer can be safely cast to FPR*. +static std::size_t GetXSTATESize() { + unsigned int eax, ebx, ecx, edx; + // First check whether the XSTATE are is supported at all. + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx) || !(ecx & bit_XSAVE)) + return sizeof(FPR); + + // Then fetch the maximum size of the area. + if (!get_cpuid_count(0x0d, 0, &eax, &ebx, &ecx, &edx)) + return sizeof(FPR); + return std::max(ecx, sizeof(FPR)); +} + NativeRegisterContextLinux_x86_64::NativeRegisterContextLinux_x86_64( const ArchSpec &target_arch, NativeThreadProtocol &native_thread) : NativeRegisterContextLinux(native_thread, CreateRegisterInfoInterface(target_arch)), - m_xstate_type(XStateType::Invalid), m_fpr(), m_iovec(), m_ymm_set(), - m_mpx_set(), m_reg_info(), m_gpr_x86_64() { + m_xstate_type(XStateType::Invalid), m_ymm_set(), m_mpx_set(), + m_reg_info(), m_gpr_x86_64() { // Set up data about ranges of valid registers. switch (target_arch.GetMachine()) { case llvm::Triple::x86: @@ -329,14 +363,13 @@ NativeRegisterContextLinux_x86_64::NativeRegisterContextLinux_x86_64( break; } - // Initialize m_iovec to point to the buffer and buffer size using the - // conventions of Berkeley style UIO structures, as required by PTRACE - // extensions. - m_iovec.iov_base = &m_fpr; - m_iovec.iov_len = sizeof(m_fpr); + std::size_t xstate_size = GetXSTATESize(); + m_xstate.reset(static_cast(std::malloc(xstate_size))); + m_iovec.iov_base = m_xstate.get(); + m_iovec.iov_len = xstate_size; // Clear out the FPR state. - ::memset(&m_fpr, 0, sizeof(m_fpr)); + ::memset(m_xstate.get(), 0, xstate_size); // Store byte offset of fctrl (i.e. first register of FPR) const RegisterInfo *reg_info_fctrl = GetRegisterInfoByName("fctrl"); @@ -439,14 +472,17 @@ NativeRegisterContextLinux_x86_64::ReadRegister(const RegisterInfo *reg_info, if (byte_order != lldb::eByteOrderInvalid) { if (reg >= m_reg_info.first_st && reg <= m_reg_info.last_st) - reg_value.SetBytes(m_fpr.fxsave.stmm[reg - m_reg_info.first_st].bytes, - reg_info->byte_size, byte_order); + reg_value.SetBytes( + m_xstate->fxsave.stmm[reg - m_reg_info.first_st].bytes, + reg_info->byte_size, byte_order); if (reg >= m_reg_info.first_mm && reg <= m_reg_info.last_mm) - reg_value.SetBytes(m_fpr.fxsave.stmm[reg - m_reg_info.first_mm].bytes, - reg_info->byte_size, byte_order); + reg_value.SetBytes( + m_xstate->fxsave.stmm[reg - m_reg_info.first_mm].bytes, + reg_info->byte_size, byte_order); if (reg >= m_reg_info.first_xmm && reg <= m_reg_info.last_xmm) - reg_value.SetBytes(m_fpr.fxsave.xmm[reg - m_reg_info.first_xmm].bytes, - reg_info->byte_size, byte_order); + reg_value.SetBytes( + m_xstate->fxsave.xmm[reg - m_reg_info.first_xmm].bytes, + reg_info->byte_size, byte_order); if (reg >= m_reg_info.first_ymm && reg <= m_reg_info.last_ymm) { // Concatenate ymm using the register halves in xmm.bytes and // ymmh.bytes @@ -488,7 +524,7 @@ NativeRegisterContextLinux_x86_64::ReadRegister(const RegisterInfo *reg_info, return error; } - // Get pointer to m_fpr.fxsave variable and set the data from it. + // Get pointer to m_xstate->fxsave variable and set the data from it. // Byte offsets of all registers are calculated wrt 'UserArea' structure. // However, ReadFPR() reads fpu registers {using ptrace(PTRACE_GETFPREGS,..)} @@ -499,9 +535,9 @@ NativeRegisterContextLinux_x86_64::ReadRegister(const RegisterInfo *reg_info, // Since, FPR structure is also one of the member of UserArea structure. // byte_offset(fpu wrt FPR) = byte_offset(fpu wrt UserArea) - // byte_offset(fctrl wrt UserArea) - assert((reg_info->byte_offset - m_fctrl_offset_in_userarea) < sizeof(m_fpr)); - uint8_t *src = - (uint8_t *)&m_fpr + reg_info->byte_offset - m_fctrl_offset_in_userarea; + assert((reg_info->byte_offset - m_fctrl_offset_in_userarea) < sizeof(FPR)); + uint8_t *src = (uint8_t *)m_xstate.get() + reg_info->byte_offset - + m_fctrl_offset_in_userarea; switch (reg_info->byte_size) { case 1: reg_value.SetUInt8(*(uint8_t *)src); @@ -527,7 +563,7 @@ NativeRegisterContextLinux_x86_64::ReadRegister(const RegisterInfo *reg_info, void NativeRegisterContextLinux_x86_64::UpdateXSTATEforWrite( uint32_t reg_index) { - XSAVE_HDR::XFeature &xstate_bv = m_fpr.xsave.header.xstate_bv; + XSAVE_HDR::XFeature &xstate_bv = m_xstate->xsave.header.xstate_bv; if (IsFPR(reg_index)) { // IsFPR considers both %st and %xmm registers as floating point, but these // map to two features. Set both flags, just in case. @@ -559,15 +595,15 @@ Status NativeRegisterContextLinux_x86_64::WriteRegister( if (IsFPR(reg_index) || IsAVX(reg_index) || IsMPX(reg_index)) { if (reg_info->encoding == lldb::eEncodingVector) { if (reg_index >= m_reg_info.first_st && reg_index <= m_reg_info.last_st) - ::memcpy(m_fpr.fxsave.stmm[reg_index - m_reg_info.first_st].bytes, + ::memcpy(m_xstate->fxsave.stmm[reg_index - m_reg_info.first_st].bytes, reg_value.GetBytes(), reg_value.GetByteSize()); if (reg_index >= m_reg_info.first_mm && reg_index <= m_reg_info.last_mm) - ::memcpy(m_fpr.fxsave.stmm[reg_index - m_reg_info.first_mm].bytes, + ::memcpy(m_xstate->fxsave.stmm[reg_index - m_reg_info.first_mm].bytes, reg_value.GetBytes(), reg_value.GetByteSize()); if (reg_index >= m_reg_info.first_xmm && reg_index <= m_reg_info.last_xmm) - ::memcpy(m_fpr.fxsave.xmm[reg_index - m_reg_info.first_xmm].bytes, + ::memcpy(m_xstate->fxsave.xmm[reg_index - m_reg_info.first_xmm].bytes, reg_value.GetBytes(), reg_value.GetByteSize()); if (reg_index >= m_reg_info.first_ymm && @@ -596,7 +632,7 @@ Status NativeRegisterContextLinux_x86_64::WriteRegister( return Status("CopyMPXtoXSTATE() failed"); } } else { - // Get pointer to m_fpr.fxsave variable and set the data to it. + // Get pointer to m_xstate->fxsave variable and set the data to it. // Byte offsets of all registers are calculated wrt 'UserArea' structure. // However, WriteFPR() takes m_fpr (of type FPR structure) and writes @@ -608,8 +644,8 @@ Status NativeRegisterContextLinux_x86_64::WriteRegister( // byte_offset(fpu wrt FPR) = byte_offset(fpu wrt UserArea) - // byte_offset(fctrl wrt UserArea) assert((reg_info->byte_offset - m_fctrl_offset_in_userarea) < - sizeof(m_fpr)); - uint8_t *dst = (uint8_t *)&m_fpr + reg_info->byte_offset - + sizeof(FPR)); + uint8_t *dst = (uint8_t *)m_xstate.get() + reg_info->byte_offset - m_fctrl_offset_in_userarea; switch (reg_info->byte_size) { case 1: @@ -667,7 +703,7 @@ Status NativeRegisterContextLinux_x86_64::ReadAllRegisterValues( ::memcpy(dst, &m_gpr_x86_64, GetRegisterInfoInterface().GetGPRSize()); dst += GetRegisterInfoInterface().GetGPRSize(); if (m_xstate_type == XStateType::FXSAVE) - ::memcpy(dst, &m_fpr.fxsave, sizeof(m_fpr.fxsave)); + ::memcpy(dst, &m_xstate->fxsave, sizeof(m_xstate->fxsave)); else if (m_xstate_type == XStateType::XSAVE) { lldb::ByteOrder byte_order = GetByteOrder(); @@ -700,7 +736,7 @@ Status NativeRegisterContextLinux_x86_64::ReadAllRegisterValues( } } // Copy the extended register state including the assembled ymm registers. - ::memcpy(dst, &m_fpr, sizeof(m_fpr)); + ::memcpy(dst, m_xstate.get(), sizeof(FPR)); } else { assert(false && "how do we save the floating point registers?"); error.SetErrorString("unsure how to save the floating point registers"); @@ -758,9 +794,9 @@ Status NativeRegisterContextLinux_x86_64::WriteAllRegisterValues( src += GetRegisterInfoInterface().GetGPRSize(); if (m_xstate_type == XStateType::FXSAVE) - ::memcpy(&m_fpr.fxsave, src, sizeof(m_fpr.fxsave)); + ::memcpy(&m_xstate->fxsave, src, sizeof(m_xstate->fxsave)); else if (m_xstate_type == XStateType::XSAVE) - ::memcpy(&m_fpr.xsave, src, sizeof(m_fpr.xsave)); + ::memcpy(&m_xstate->xsave, src, sizeof(m_xstate->xsave)); error = WriteFPR(); if (error.Fail()) @@ -814,12 +850,12 @@ bool NativeRegisterContextLinux_x86_64::IsCPUFeatureAvailable( return true; case RegSet::avx: // Check if CPU has AVX and if there is kernel support, by // reading in the XCR0 area of XSAVE. - if ((m_fpr.xsave.i387.xcr0 & mask_XSTATE_AVX) == mask_XSTATE_AVX) + if ((m_xstate->xsave.i387.xcr0 & mask_XSTATE_AVX) == mask_XSTATE_AVX) return true; break; case RegSet::mpx: // Check if CPU has MPX and if there is kernel support, by // reading in the XCR0 area of XSAVE. - if ((m_fpr.xsave.i387.xcr0 & mask_XSTATE_MPX) == mask_XSTATE_MPX) + if ((m_xstate->xsave.i387.xcr0 & mask_XSTATE_MPX) == mask_XSTATE_MPX) return true; break; } @@ -856,10 +892,10 @@ Status NativeRegisterContextLinux_x86_64::WriteFPR() { switch (m_xstate_type) { case XStateType::FXSAVE: return WriteRegisterSet( - &m_iovec, sizeof(m_fpr.fxsave), + &m_iovec, sizeof(m_xstate->fxsave), fxsr_regset(GetRegisterInfoInterface().GetTargetArchitecture())); case XStateType::XSAVE: - return WriteRegisterSet(&m_iovec, sizeof(m_fpr.xsave), NT_X86_XSTATE); + return WriteRegisterSet(&m_iovec, sizeof(m_xstate->xsave), NT_X86_XSTATE); default: return Status("Unrecognized FPR type."); } @@ -879,11 +915,11 @@ bool NativeRegisterContextLinux_x86_64::CopyXSTATEtoYMM( if (byte_order == lldb::eByteOrderLittle) { ::memcpy(m_ymm_set.ymm[reg_index - m_reg_info.first_ymm].bytes, - m_fpr.fxsave.xmm[reg_index - m_reg_info.first_ymm].bytes, + m_xstate->fxsave.xmm[reg_index - m_reg_info.first_ymm].bytes, sizeof(XMMReg)); ::memcpy(m_ymm_set.ymm[reg_index - m_reg_info.first_ymm].bytes + sizeof(XMMReg), - m_fpr.xsave.ymmh[reg_index - m_reg_info.first_ymm].bytes, + m_xstate->xsave.ymmh[reg_index - m_reg_info.first_ymm].bytes, sizeof(YMMHReg)); return true; } @@ -891,10 +927,10 @@ bool NativeRegisterContextLinux_x86_64::CopyXSTATEtoYMM( if (byte_order == lldb::eByteOrderBig) { ::memcpy(m_ymm_set.ymm[reg_index - m_reg_info.first_ymm].bytes + sizeof(XMMReg), - m_fpr.fxsave.xmm[reg_index - m_reg_info.first_ymm].bytes, + m_xstate->fxsave.xmm[reg_index - m_reg_info.first_ymm].bytes, sizeof(XMMReg)); ::memcpy(m_ymm_set.ymm[reg_index - m_reg_info.first_ymm].bytes, - m_fpr.xsave.ymmh[reg_index - m_reg_info.first_ymm].bytes, + m_xstate->xsave.ymmh[reg_index - m_reg_info.first_ymm].bytes, sizeof(YMMHReg)); return true; } @@ -907,19 +943,19 @@ bool NativeRegisterContextLinux_x86_64::CopyYMMtoXSTATE( return false; if (byte_order == lldb::eByteOrderLittle) { - ::memcpy(m_fpr.fxsave.xmm[reg - m_reg_info.first_ymm].bytes, + ::memcpy(m_xstate->fxsave.xmm[reg - m_reg_info.first_ymm].bytes, m_ymm_set.ymm[reg - m_reg_info.first_ymm].bytes, sizeof(XMMReg)); - ::memcpy(m_fpr.xsave.ymmh[reg - m_reg_info.first_ymm].bytes, + ::memcpy(m_xstate->xsave.ymmh[reg - m_reg_info.first_ymm].bytes, m_ymm_set.ymm[reg - m_reg_info.first_ymm].bytes + sizeof(XMMReg), sizeof(YMMHReg)); return true; } if (byte_order == lldb::eByteOrderBig) { - ::memcpy(m_fpr.fxsave.xmm[reg - m_reg_info.first_ymm].bytes, + ::memcpy(m_xstate->fxsave.xmm[reg - m_reg_info.first_ymm].bytes, m_ymm_set.ymm[reg - m_reg_info.first_ymm].bytes + sizeof(XMMReg), sizeof(XMMReg)); - ::memcpy(m_fpr.xsave.ymmh[reg - m_reg_info.first_ymm].bytes, + ::memcpy(m_xstate->xsave.ymmh[reg - m_reg_info.first_ymm].bytes, m_ymm_set.ymm[reg - m_reg_info.first_ymm].bytes, sizeof(YMMHReg)); return true; } @@ -929,7 +965,7 @@ bool NativeRegisterContextLinux_x86_64::CopyYMMtoXSTATE( void *NativeRegisterContextLinux_x86_64::GetFPRBuffer() { switch (m_xstate_type) { case XStateType::FXSAVE: - return &m_fpr.fxsave; + return &m_xstate->fxsave; case XStateType::XSAVE: return &m_iovec; default: @@ -940,7 +976,7 @@ void *NativeRegisterContextLinux_x86_64::GetFPRBuffer() { size_t NativeRegisterContextLinux_x86_64::GetFPRSize() { switch (m_xstate_type) { case XStateType::FXSAVE: - return sizeof(m_fpr.fxsave); + return sizeof(m_xstate->fxsave); case XStateType::XSAVE: return sizeof(m_iovec); default: @@ -953,14 +989,14 @@ Status NativeRegisterContextLinux_x86_64::ReadFPR() { // Probe XSAVE and if it is not supported fall back to FXSAVE. if (m_xstate_type != XStateType::FXSAVE) { - error = ReadRegisterSet(&m_iovec, sizeof(m_fpr.xsave), NT_X86_XSTATE); + error = ReadRegisterSet(&m_iovec, sizeof(m_xstate->xsave), NT_X86_XSTATE); if (!error.Fail()) { m_xstate_type = XStateType::XSAVE; return error; } } error = ReadRegisterSet( - &m_iovec, sizeof(m_fpr.xsave), + &m_iovec, sizeof(m_xstate->xsave), fxsr_regset(GetRegisterInfoInterface().GetTargetArchitecture())); if (!error.Fail()) { m_xstate_type = XStateType::FXSAVE; @@ -982,11 +1018,11 @@ bool NativeRegisterContextLinux_x86_64::CopyXSTATEtoMPX(uint32_t reg) { if (reg >= m_reg_info.first_mpxr && reg <= m_reg_info.last_mpxr) { ::memcpy(m_mpx_set.mpxr[reg - m_reg_info.first_mpxr].bytes, - m_fpr.xsave.mpxr[reg - m_reg_info.first_mpxr].bytes, + m_xstate->xsave.mpxr[reg - m_reg_info.first_mpxr].bytes, sizeof(MPXReg)); } else { ::memcpy(m_mpx_set.mpxc[reg - m_reg_info.first_mpxc].bytes, - m_fpr.xsave.mpxc[reg - m_reg_info.first_mpxc].bytes, + m_xstate->xsave.mpxc[reg - m_reg_info.first_mpxc].bytes, sizeof(MPXCsr)); } return true; @@ -997,10 +1033,10 @@ bool NativeRegisterContextLinux_x86_64::CopyMPXtoXSTATE(uint32_t reg) { return false; if (reg >= m_reg_info.first_mpxr && reg <= m_reg_info.last_mpxr) { - ::memcpy(m_fpr.xsave.mpxr[reg - m_reg_info.first_mpxr].bytes, + ::memcpy(m_xstate->xsave.mpxr[reg - m_reg_info.first_mpxr].bytes, m_mpx_set.mpxr[reg - m_reg_info.first_mpxr].bytes, sizeof(MPXReg)); } else { - ::memcpy(m_fpr.xsave.mpxc[reg - m_reg_info.first_mpxc].bytes, + ::memcpy(m_xstate->xsave.mpxc[reg - m_reg_info.first_mpxc].bytes, m_mpx_set.mpxc[reg - m_reg_info.first_mpxc].bytes, sizeof(MPXCsr)); } return true; diff --git a/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.h b/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.h index 9dcf82f50a4..2970326306e 100644 --- a/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.h +++ b/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.h @@ -109,7 +109,8 @@ class NativeRegisterContextLinux_x86_64 : public NativeRegisterContextLinux { // Private member variables. mutable XStateType m_xstate_type; - FPR m_fpr; // Extended States Area, named FPR for historical reasons. + std::unique_ptr + m_xstate; // Extended States Area, named FPR for historical reasons. struct iovec m_iovec; YMM m_ymm_set; MPX m_mpx_set;