From 0d3acbd9c7a4ac24ab5a4dbb39672f8f034f0a15 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Wed, 29 Jul 2015 20:53:05 +0200 Subject: [PATCH 1/4] PPCAnalyst: drop needless forward declarations --- Source/Core/Core/PowerPC/PPCAnalyst.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index f08b0f4216..474afc0093 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -46,9 +46,6 @@ CodeBuffer::~CodeBuffer() delete[] codebuffer; } -void AnalyzeFunction2(Symbol &func); -u32 EvaluateBranchTarget(UGeckoInstruction instr, u32 pc); - #define INVALID_TARGET ((u32)-1) u32 EvaluateBranchTarget(UGeckoInstruction instr, u32 pc) From a3476415f6fa67e6f45a583e91940a0b29166ce2 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Tue, 4 Aug 2015 21:15:35 +0200 Subject: [PATCH 2/4] JitRegCache: mark derived classes as final --- Source/Core/Core/PowerPC/Jit64/JitRegCache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index 05d80ce8f6..ca2838dab3 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -170,7 +170,7 @@ public: int NumFreeRegisters(); }; -class GPRRegCache : public RegCache +class GPRRegCache final : public RegCache { public: void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; @@ -183,7 +183,7 @@ public: }; -class FPURegCache : public RegCache +class FPURegCache final : public RegCache { public: void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override; From f5a10bddeea0924eba8c070baa4f70c8ddb6e91b Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Tue, 4 Aug 2015 21:21:37 +0200 Subject: [PATCH 3/4] Jit64: use overloaded IsSimpleReg() where useful --- .../Core/Core/PowerPC/Jit64/JitRegCache.cpp | 2 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index 04f212b568..c546f84002 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -289,7 +289,7 @@ void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty) LoadRegister(i, xr); for (size_t j = 0; j < regs.size(); j++) { - if (i != j && regs[j].location.IsSimpleReg() && regs[j].location.GetSimpleReg() == xr) + if (i != j && regs[j].location.IsSimpleReg(xr)) { Crash(); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 2731a28600..a4a8a6212c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -476,7 +476,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, B // fun tricks... if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address)) { - if (!arg.IsSimpleReg() || arg.GetSimpleReg() != RSCRATCH) + if (!arg.IsSimpleReg(RSCRATCH)) MOV(accessSize, R(RSCRATCH), arg); UnsafeWriteGatherPipe(accessSize); @@ -654,7 +654,7 @@ void EmuCodeBlock::ForceSinglePrecision(X64Reg output, const OpArg& input, bool MOVDDUP(output, R(output)); } } - else if (!input.IsSimpleReg() || input.GetSimpleReg() != output) + else if (!input.IsSimpleReg(output)) { if (duplicate) MOVDDUP(output, input); @@ -667,7 +667,7 @@ void EmuCodeBlock::ForceSinglePrecision(X64Reg output, const OpArg& input, bool void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void (XEmitter::*sseOp)(X64Reg, const OpArg&), X64Reg regOp, const OpArg& arg1, const OpArg& arg2, bool packed, bool reversible) { - if (arg1.IsSimpleReg() && regOp == arg1.GetSimpleReg()) + if (arg1.IsSimpleReg(regOp)) { (this->*sseOp)(regOp, arg2); } @@ -675,7 +675,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2); } - else if (arg2.IsSimpleReg() && arg2.GetSimpleReg() == regOp) + else if (arg2.IsSimpleReg(regOp)) { if (reversible) { @@ -684,7 +684,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), else { // The ugly case: regOp == arg2 without AVX, or with arg1 == memory - if (!arg1.IsSimpleReg() || arg1.GetSimpleReg() != XMM0) + if (!arg1.IsSimpleReg(XMM0)) MOVAPD(XMM0, arg1); if (cpu_info.bAVX) { @@ -714,7 +714,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, u8), void (XEmitter::*sseOp)(X64Reg, const OpArg&, u8), X64Reg regOp, const OpArg& arg1, const OpArg& arg2, u8 imm) { - if (arg1.IsSimpleReg() && regOp == arg1.GetSimpleReg()) + if (arg1.IsSimpleReg(regOp)) { (this->*sseOp)(regOp, arg2, imm); } @@ -722,10 +722,10 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, { (this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm); } - else if (arg2.IsSimpleReg() && arg2.GetSimpleReg() == regOp) + else if (arg2.IsSimpleReg(regOp)) { // The ugly case: regOp == arg2 without AVX, or with arg1 == memory - if (!arg1.IsSimpleReg() || arg1.GetSimpleReg() != XMM0) + if (!arg1.IsSimpleReg(XMM0)) MOVAPD(XMM0, arg1); if (cpu_info.bAVX) { @@ -764,14 +764,14 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg } else { - if (!input.IsSimpleReg() || input.GetSimpleReg() != output) + if (!input.IsSimpleReg(output)) MOVAPD(output, input); avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true); PAND(output, M(psMantissaTruncate)); PADDQ(output, R(tmp)); } } - else if (!input.IsSimpleReg() || input.GetSimpleReg() != output) + else if (!input.IsSimpleReg(output)) { MOVAPD(output, input); } From 439fb26b9b7c123b6b94e63c2eb38059f237ce18 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Tue, 4 Aug 2015 23:22:13 +0200 Subject: [PATCH 4/4] x64Emitter: add MOVSLDUP/MOVSHDUP --- Source/Core/Common/x64Emitter.cpp | 33 +++++++++++++++++++--- Source/Core/Common/x64Emitter.h | 5 +++- Source/UnitTests/Common/x64EmitterTest.cpp | 2 ++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 1c1a8b9754..eac841d162 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1636,22 +1636,47 @@ void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, d void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only -// THESE TWO ARE UNTESTED. void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} - void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} +// Pretty much every x86 CPU nowadays supports SSE3, +// but the SSE2 fallbacks are easy. +void XEmitter::MOVSLDUP(X64Reg regOp, const OpArg& arg) +{ + if (cpu_info.bSSE3) + { + WriteSSEOp(0xF3, 0x12, regOp, arg); + } + else + { + if (!arg.IsSimpleReg(regOp)) + MOVAPD(regOp, arg); + UNPCKLPS(regOp, R(regOp)); + } +} +void XEmitter::MOVSHDUP(X64Reg regOp, const OpArg& arg) +{ + if (cpu_info.bSSE3) + { + WriteSSEOp(0xF3, 0x16, regOp, arg); + } + else + { + if (!arg.IsSimpleReg(regOp)) + MOVAPD(regOp, arg); + UNPCKHPS(regOp, R(regOp)); + } +} void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) { if (cpu_info.bSSE3) { - WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup + WriteSSEOp(0xF2, 0x12, regOp, arg); } else { - // Simulate this instruction with SSE2 instructions if (!arg.IsSimpleReg(regOp)) MOVSD(regOp, arg); UNPCKLPD(regOp, R(regOp)); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index f6faea44bd..3232cafd55 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -581,9 +581,12 @@ public: void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); - // SSE/SSE2: Useful alternative to shuffle in some cases. + // SSE3 + void MOVSLDUP(X64Reg regOp, const OpArg& arg); + void MOVSHDUP(X64Reg regOp, const OpArg& arg); void MOVDDUP(X64Reg regOp, const OpArg& arg); + // SSE/SSE2: Useful alternative to shuffle in some cases. void UNPCKLPS(X64Reg dest, const OpArg& src); void UNPCKHPS(X64Reg dest, const OpArg& src); void UNPCKLPD(X64Reg dest, const OpArg& src); diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index 766fc0436c..b6fe971546 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -721,6 +721,8 @@ TWO_OP_SSE_TEST(ANDNPD, "dqword") TWO_OP_SSE_TEST(ORPD, "dqword") TWO_OP_SSE_TEST(XORPD, "dqword") +TWO_OP_SSE_TEST(MOVSLDUP, "dqword") +TWO_OP_SSE_TEST(MOVSHDUP, "dqword") TWO_OP_SSE_TEST(MOVDDUP, "qword") TWO_OP_SSE_TEST(UNPCKLPS, "dqword")