From b0d271db3c7e79e3cf27131393d7a490f5e122d1 Mon Sep 17 00:00:00 2001 From: skidau Date: Sat, 2 Jun 2012 10:06:47 +1000 Subject: [PATCH] Changed MOVDDUP to use MOVSD on non-SSE3 CPU's. Added DMA wait time under DSP HLE mode. Fixes Knockout Kings 2003. --- Source/Core/Common/Src/x64Emitter.cpp | 2 +- Source/Core/Core/Src/HW/DSP.cpp | 10 ++++++---- .../Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp | 15 +++++++++++++-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 00c58a0fbc..d4bd1adabf 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1321,7 +1321,7 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) { // Simulate this instruction with SSE2 instructions if (!arg.IsSimpleReg(regOp)) - MOVQ_xmm(regOp, arg); // MOVSD better? + MOVSD(regOp, arg); UNPCKLPD(regOp, R(regOp)); } } diff --git a/Source/Core/Core/Src/HW/DSP.cpp b/Source/Core/Core/Src/HW/DSP.cpp index 0eb19b006f..f2509555ed 100644 --- a/Source/Core/Core/Src/HW/DSP.cpp +++ b/Source/Core/Core/Src/HW/DSP.cpp @@ -436,8 +436,9 @@ void Write16(const u16 _Value, const u32 _Address) if (tmpControl.ARAM) g_dspState.DSPControl.ARAM = 0; if (tmpControl.DSP) g_dspState.DSPControl.DSP = 0; - // g_ARAM (line below should be commented out to emulate the DMA wait time) - g_dspState.DSPControl.DMAState = 0; // keep g_ARAM DMA State zero + // Tracking DMAState fixes Knockout Kings 2003 in DSP HLE mode + if (GetDSPEmulator()->IsLLE()) + g_dspState.DSPControl.DMAState = 0; // keep g_ARAM DMA State zero // unknown g_dspState.DSPControl.unk3 = tmpControl.unk3; @@ -700,8 +701,9 @@ void Do_ARAM_DMA() // seems like a good estimate CoreTiming::ScheduleEvent_Threadsafe(g_arDMA.Cnt.count >> 1, et_GenerateDSPInterrupt, INT_ARAM | (1<<16)); - // Uncomment the line below to emulate the DMA wait time. - //g_dspState.DSPControl.DMAState = 1; + // Emulating the DMA wait time fixes Knockout Kings 2003 in DSP HLE mode + if (!GetDSPEmulator()->IsLLE()) + g_dspState.DSPControl.DMAState = 1; // Real hardware DMAs in 32byte chunks, but we can get by with 8byte chunks if (g_arDMA.Cnt.dir) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 5fcd2f9e29..fd197d0e84 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -24,6 +24,7 @@ #include "Jit.h" #include "JitRegCache.h" +#include "CPUDetect.h" const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; @@ -64,9 +65,19 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm (this->*op)(XMM0, Gen::R(XMM1)); MOVSD(fpr.RX(d), Gen::R(XMM0)); } - if (dupe) { + if (dupe) + { ForceSinglePrecisionS(fpr.RX(d)); - MOVDDUP(fpr.RX(d), fpr.R(d)); + if (cpu_info.bSSE3) + { + MOVDDUP(fpr.RX(d), fpr.R(d)); + } + else + { + if (!fpr.R(d).IsSimpleReg(fpr.RX(d))) + MOVQ_xmm(fpr.RX(d), fpr.R(d)); + UNPCKLPD(fpr.RX(d), R(fpr.RX(d))); + } } fpr.UnlockAll(); }