From 2fad2a3e88c5fbd76d897fcfed362233d3936d12 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 9 Dec 2013 22:10:18 +0100 Subject: [PATCH] Revert "Jit64: use AVX instructions in some places" and the fixes afterwards This reverts commit ff917897731d400a8e71c1480425733e651126ee. This reverts commit bcefa880e4d062121df9ab8b831170ec3a689a50. This reverts commit 710a0ff435c8593914c6eb074b6bc4d1985ae8c5. This reverts commit ddaf29e039d1736be1ca48c1c0b6af1c8d4ec47d. --- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 +- .../Src/PowerPC/Jit64/Jit_FloatingPoint.cpp | 112 ++++++------------ 2 files changed, 35 insertions(+), 79 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 139414a103..3fd9c41b61 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -119,7 +119,7 @@ public: void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); - void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg), void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg)); + void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); // OPCODES void unknown_instruction(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 7f77112ae9..e165ec1b55 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -13,61 +13,36 @@ static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFF static const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0}; static const double one_const = 1.0f; -void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, - void (XEmitter::*op_2)(Gen::X64Reg, Gen::OpArg), - void (XEmitter::*op_3)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg)) +void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)) { - if (!cpu_info.bAVX) - { - op_3 = nullptr; - } - fpr.Lock(d, a, b); if (d == a) { - fpr.BindToRegister(d); - (this->*op_2)(fpr.RX(d), fpr.R(b)); + fpr.BindToRegister(d, true); + (this->*op)(fpr.RX(d), fpr.R(b)); } else if (d == b) { if (reversible) { - fpr.BindToRegister(d); - (this->*op_2)(fpr.RX(d), fpr.R(a)); + fpr.BindToRegister(d, true); + (this->*op)(fpr.RX(d), fpr.R(a)); } else { - if (op_3) - { - fpr.BindToRegister(d); - fpr.BindToRegister(a, true, false); - (this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b)); - } - else - { - MOVSD(XMM0, fpr.R(b)); - fpr.BindToRegister(d, false); - MOVSD(fpr.RX(d), fpr.R(a)); - (this->*op_2)(fpr.RX(d), Gen::R(XMM0)); - } + MOVSD(XMM0, fpr.R(b)); + fpr.BindToRegister(d, !single); + MOVSD(fpr.RX(d), fpr.R(a)); + (this->*op)(fpr.RX(d), Gen::R(XMM0)); } } else { - if (op_3) - { - fpr.BindToRegister(d, false); - fpr.BindToRegister(a); - (this->*op_3)(fpr.RX(d), fpr.RX(a), fpr.R(b)); - } - else - { - fpr.BindToRegister(d, false); - MOVSD(fpr.RX(d), fpr.R(a)); - (this->*op_2)(fpr.RX(d), fpr.R(b)); - } + // Sources different from d, can use rather quick solution + fpr.BindToRegister(d, !single); + MOVSD(fpr.RX(d), fpr.R(a)); + (this->*op)(fpr.RX(d), fpr.R(b)); } - if (single) { ForceSinglePrecisionS(fpr.RX(d)); @@ -101,10 +76,10 @@ void Jit64::fp_arith(UGeckoInstruction inst) bool single = inst.OPCD == 59; switch (inst.SUBOP5) { - case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, &XEmitter::VDIVSD); break; //div - case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, &XEmitter::VSUBSD); break; //sub - case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, &XEmitter::VADDSD); break; //add - case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, &XEmitter::VMULSD); break; //mul + case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD); break; //div + case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD); break; //sub + case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD); break; //add + case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD); break; //mul default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); } @@ -112,24 +87,17 @@ void Jit64::fp_arith(UGeckoInstruction inst) void Jit64::frsqrtex(UGeckoInstruction inst) { - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff) - int d = inst.FD; - int b = inst.FB; - fpr.Lock(b, d); - fpr.BindToRegister(d, d == b, true); - MOVSD(XMM0, M((void *)&one_const)); - SQRTSD(XMM1, fpr.R(b)); - if (cpu_info.bAVX) - { - VDIVSD(fpr.RX(d), XMM0, R(XMM1)); - } - else - { - DIVSD(XMM0, R(XMM1)); - MOVSD(fpr.R(d), XMM0); - } - fpr.UnlockAll(); + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + int d = inst.FD; + int b = inst.FB; + fpr.Lock(b, d); + fpr.BindToRegister(d, true, true); + MOVSD(XMM0, M((void *)&one_const)); + SQRTSD(XMM1, fpr.R(b)); + DIVSD(XMM0, R(XMM1)); + MOVSD(fpr.R(d), XMM0); + fpr.UnlockAll(); } void Jit64::fmaddXX(UGeckoInstruction inst) @@ -224,28 +192,16 @@ void Jit64::fmrx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) - if (inst.Rc) - { + if (inst.Rc) { Default(inst); return; } int d = inst.FD; int b = inst.FB; - if (d != b) - { - fpr.Lock(b, d); - - // we don't need to load d, but if it already is, it must be marked as dirty - if (fpr.IsBound(d)) - { - fpr.BindToRegister(d); - } - fpr.BindToRegister(b, true, false); - - // caveat: the order of ModRM:r/m, ModRM:reg is deliberate! - // "MOVSD reg, mem" zeros out the upper half of the destination register - MOVSD(fpr.R(d), fpr.RX(b)); - fpr.UnlockAll(); - } + fpr.Lock(b, d); + fpr.BindToRegister(d, true, true); + MOVSD(XMM0, fpr.R(b)); + MOVSD(fpr.R(d), XMM0); + fpr.UnlockAll(); } void Jit64::fcmpx(UGeckoInstruction inst)