diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index 2c3fba85b6..bb7245489e 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -184,14 +184,6 @@ bool Init() g_CoreStartupParameter = _CoreParameter; - // TODO: Reenable JIT instructions - if (g_CoreStartupParameter.bMMU) - { - g_CoreStartupParameter.bJITLoadStoreOff = true; - g_CoreStartupParameter.bJITLoadStorePairedOff = true; - g_CoreStartupParameter.bJITLoadStoreFloatingOff = true; - } - // FIXME DEBUG_LOG(BOOT, dump_params()); Host_SetWaitCursor(true); diff --git a/Source/Core/Core/Src/HW/MemmapFunctions.cpp b/Source/Core/Core/Src/HW/MemmapFunctions.cpp index 47fef6020c..267609e2b6 100644 --- a/Source/Core/Core/Src/HW/MemmapFunctions.cpp +++ b/Source/Core/Core/Src/HW/MemmapFunctions.cpp @@ -147,7 +147,7 @@ u32 EFB_Read(const u32 addr) } template -inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag) +inline void ReadFromHardware(T &_var, const u32 em_address, const u32 effective_address, Memory::XCheckTLBFlag flag) { // TODO: Figure out the fastest order of tests for both read and write (they are probably different). if ((em_address & 0xC8000000) == 0xC8000000) diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index d62c5ae5a6..93185aa413 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -394,8 +394,7 @@ void dcbtst(UGeckoInstruction _inst) void dcbz(UGeckoInstruction _inst) { // HACK but works... we think - if (!Core::g_CoreStartupParameter.bMMU) - Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); // Breaks Rogue Leader, fixes Super Mario Sunshine + Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); } // eciwx/ecowx technically should access the specified device diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 28b01e979f..22e2a7a28c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -194,7 +194,7 @@ void Jit64::Init() #else jo.enableFastMem = false; #endif - jo.assumeFPLoadFromMem = true; + jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem; jo.fpAccurateFcmp = true; // Fallback to Interpreter jo.optimizeGatherPipe = true; jo.fastInterrupts = false; @@ -575,6 +575,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) { + // In case we are about to jump to the dispatcher, flush regs + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = J_CC(CC_Z); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 3ce0e12ef3..1aa20ce9fd 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -38,51 +38,27 @@ void Jit64::lbzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) { Default(inst); return; } int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); gpr.FlushLockX(ABI_PARAM1); - if (b == d || a == d) - gpr.LoadToX64(d, true, true); - else - gpr.LoadToX64(d, false, true); MOV(32, R(ABI_PARAM1), gpr.R(b)); if (a) + { ADD(32, R(ABI_PARAM1), gpr.R(a)); -#if 0 + } + SafeLoadRegToEAX(ABI_PARAM1, 8, 0); - MOV(32, gpr.R(d), R(EAX)); -#else - UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false); -#endif - gpr.UnlockAll(); - gpr.UnlockAllX(); -} -void Jit64::lwzx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(LoadStore) + MEMCHECK_START - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.FlushLockX(ABI_PARAM1); - if (b == d || a == d) - gpr.LoadToX64(d, true, true); - else - gpr.LoadToX64(d, false, true); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); -#if 1 - SafeLoadRegToEAX(ABI_PARAM1, 32, 0); + gpr.KillImmediate(d); MOV(32, gpr.R(d), R(EAX)); -#else - UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false); -#endif - gpr.UnlockAll(); + + MEMCHECK_END + gpr.UnlockAllX(); } @@ -92,21 +68,48 @@ void Jit64::lhax(UGeckoInstruction inst) JITDISABLE(LoadStore) int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); gpr.FlushLockX(ABI_PARAM1); - if (b == d || a == d) - gpr.LoadToX64(d, true, true); - else - gpr.LoadToX64(d, false, true); MOV(32, R(ABI_PARAM1), gpr.R(b)); if (a) + { ADD(32, R(ABI_PARAM1), gpr.R(a)); + } // Some homebrew actually loads from a hw reg with this instruction SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true); + + MEMCHECK_START + + gpr.KillImmediate(d); MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); + MEMCHECK_END + + gpr.UnlockAllX(); +} + +void Jit64::lwzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.FlushLockX(ABI_PARAM1); + MOV(32, R(ABI_PARAM1), gpr.R(b)); + if (a) + { + ADD(32, R(ABI_PARAM1), gpr.R(a)); + } + + SafeLoadRegToEAX(ABI_PARAM1, 32, 0); + + MEMCHECK_START + + gpr.KillImmediate(d); + MOV(32, gpr.R(d), R(EAX)); + + MEMCHECK_END + gpr.UnlockAllX(); } @@ -114,6 +117,7 @@ void Jit64::lXz(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff) { Default(inst); return; } @@ -135,7 +139,7 @@ void Jit64::lXz(UGeckoInstruction inst) { // TODO(LinesPrower): // - Rewrite this! - // It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right + // It seems to be ugly and inefficient, but I don't know JIT stuff enough to make it right // It only demonstrates the idea // do our job at first @@ -178,6 +182,7 @@ void Jit64::lXz(UGeckoInstruction inst) Default(inst); return; } + int accessSize; switch (inst.OPCD) { @@ -193,40 +198,32 @@ void Jit64::lXz(UGeckoInstruction inst) return; } - //Still here? Do regular path. -#if defined(_M_X64) - if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) { -#else - if (true) { -#endif - // Safe and boring + if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU) + { + // Fast and daring + gpr.Lock(a, d); + gpr.LoadToX64(a, true, false); + gpr.LoadToX64(d, a == d, true); + MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); + BSWAP(32, gpr.R(d).GetSimpleReg()); + gpr.UnlockAll(); + gpr.Flush(FLUSH_ALL); + } + else + { gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); - gpr.LoadToX64(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - return; - } + + MEMCHECK_START - // Fast and daring - gpr.Lock(a, d); - gpr.LoadToX64(a, true, false); - gpr.LoadToX64(d, a == d, true); - MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); - switch (accessSize) { - case 32: - BSWAP(32, gpr.R(d).GetSimpleReg()); - break; - // Careful in the backpatch - need to properly nop over first - // case 16: - // BSWAP(32, gpr.R(d).GetSimpleReg()); - // SHR(32, gpr.R(d), Imm8(16)); - // break; + gpr.KillImmediate(d); + MOV(32, gpr.R(d), R(EAX)); + + MEMCHECK_END + + gpr.UnlockAllX(); } - gpr.UnlockAll(); } void Jit64::lha(UGeckoInstruction inst) @@ -239,14 +236,17 @@ void Jit64::lha(UGeckoInstruction inst) s32 offset = (s32)(s16)inst.SIMM_16; // Safe and boring gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); - gpr.LoadToX64(d, d == a, true); + + MEMCHECK_START + + gpr.KillImmediate(d); MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); + + MEMCHECK_END + gpr.UnlockAllX(); - return; } void Jit64::lwzux(UGeckoInstruction inst) @@ -260,17 +260,20 @@ void Jit64::lwzux(UGeckoInstruction inst) Default(inst); return; } - gpr.Lock(a, b, d); - - gpr.LoadToX64(d, b == d, true); + gpr.Lock(a); gpr.LoadToX64(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); MOV(32, R(EAX), gpr.R(a)); SafeLoadRegToEAX(EAX, 32, 0, false); + + MEMCHECK_START + + gpr.KillImmediate(d); MOV(32, gpr.R(d), R(EAX)); + MEMCHECK_END + gpr.UnlockAll(); - return; } // Zero cache line. @@ -348,7 +351,7 @@ void Jit64::stX(UGeckoInstruction inst) gpr.SetImmediate32(a, addr); MOV(accessSize, R(EAX), gpr.R(s)); BSWAP(accessSize, EAX); - WriteToConstRamAddress(accessSize, R(EAX), addr); + WriteToConstRamAddress(accessSize, R(EAX), addr); return; } // Other IO not worth the trouble. @@ -387,35 +390,23 @@ void Jit64::stX(UGeckoInstruction inst) #endif*/ //Still here? Do regular path. - gpr.Lock(s, a); - gpr.FlushLockX(ECX, EDX); - MOV(32, R(EDX), gpr.R(a)); - MOV(32, R(ECX), gpr.R(s)); - if (offset) - ADD(32, R(EDX), Imm32((u32)offset)); + + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + gpr.Lock(a); + MOV(32, R(ABI_PARAM2), gpr.R(a)); + MOV(32, R(ABI_PARAM1), gpr.R(s)); + SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, accessSize, offset); + if (update && offset) { - gpr.LoadToX64(a, true, true); - MOV(32, gpr.R(a), R(EDX)); + MEMCHECK_START + + gpr.KillImmediate(a); + MOV(32, gpr.R(a), R(ABI_PARAM2)); + + MEMCHECK_END } - TEST(32, R(EDX), Imm32(0x0C000000)); - FixupBranch unsafe_addr = J_CC(CC_NZ); - BSWAP(accessSize, ECX); -#ifdef _M_X64 - MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX)); -#else - AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX)); -#endif - FixupBranch skip_call = J(); - SetJumpTarget(unsafe_addr); - switch (accessSize) - { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break; - case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break; - } - SetJumpTarget(skip_call); + gpr.UnlockAll(); gpr.UnlockAllX(); } @@ -459,9 +450,14 @@ void Jit64::stXx(UGeckoInstruction inst) MOV(32, R(ECX), gpr.R(s)); SafeWriteRegToReg(ECX, EDX, accessSize, 0); + //MEMCHECK_START + + // TODO: Insert rA update code here + + //MEMCHECK_END + gpr.UnlockAll(); gpr.UnlockAllX(); - return; } // A few games use these heavily in video codecs. diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 793163cca3..6ed2ea6d19 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -63,7 +63,6 @@ void Jit64::lfs(UGeckoInstruction inst) } s32 offset = (s32)(s16)inst.SIMM_16; gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(a); MOV(32, R(ABI_PARAM1), gpr.R(a)); if (jo.assumeFPLoadFromMem) { @@ -74,12 +73,16 @@ void Jit64::lfs(UGeckoInstruction inst) SafeLoadRegToEAX(ABI_PARAM1, 32, offset); } + MEMCHECK_START + MOV(32, M(&temp32), R(EAX)); fpr.Lock(d); fpr.LoadToX64(d, false); CVTSS2SD(fpr.RX(d), M(&temp32)); MOVDDUP(fpr.RX(d), fpr.R(d)); - gpr.UnlockAll(); + + MEMCHECK_END + gpr.UnlockAllX(); fpr.UnlockAll(); } @@ -90,6 +93,8 @@ void Jit64::lfd(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } + int d = inst.RD; int a = inst.RA; if (!a) @@ -119,18 +124,28 @@ void Jit64::lfd(UGeckoInstruction inst) MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); BSWAP(64, EAX); MOV(64, M(&temp64), R(EAX)); + + MEMCHECK_START + MOVSD(XMM0, M(&temp64)); MOVSD(xd, R(XMM0)); + + MEMCHECK_END #else AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset)); BSWAP(32, EAX); MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX)); + + MEMCHECK_START + MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4)); BSWAP(32, EAX); MOV(32, M(&temp64), R(EAX)); MOVSD(XMM0, M(&temp64)); MOVSD(xd, R(XMM0)); + + MEMCHECK_END #if 0 // Alternate implementation; possibly faster AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); @@ -156,6 +171,8 @@ void Jit64::stfd(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } + int s = inst.RS; int a = inst.RA; if (!a) @@ -207,18 +224,28 @@ void Jit64::stfd(UGeckoInstruction inst) #ifdef _M_X64 fpr.LoadToX64(s, true, false); MOVSD(M(&temp64), fpr.RX(s)); + + MEMCHECK_START + MOV(64, R(EAX), M(&temp64)); BSWAP(64, EAX); MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, 0), R(EAX)); + + MEMCHECK_END #else fpr.LoadToX64(s, true, false); MOVSD(M(&temp64), fpr.RX(s)); + + MEMCHECK_START + MOV(32, R(EAX), M(&temp64)); BSWAP(32, EAX); MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base + 4), R(EAX)); MOV(32, R(EAX), M((void*)((u8 *)&temp64 + 4))); BSWAP(32, EAX); MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base), R(EAX)); + + MEMCHECK_END #endif } SetJumpTarget(quit); @@ -233,6 +260,8 @@ void Jit64::stfs(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } + bool update = inst.OPCD & 1; int s = inst.RS; int a = inst.RA; @@ -287,6 +316,8 @@ void Jit64::stfsx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } + // We can take a shortcut here - it's not likely that a hardware access would use this instruction. gpr.FlushLockX(ABI_PARAM1); fpr.Lock(inst.RS); @@ -295,7 +326,8 @@ void Jit64::stfsx(UGeckoInstruction inst) ADD(32, R(ABI_PARAM1), gpr.R(inst.RA)); CVTSD2SS(XMM0, fpr.R(inst.RS)); MOVD_xmm(R(EAX), XMM0); - UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); + SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); + gpr.UnlockAllX(); fpr.UnlockAll(); } @@ -306,12 +338,14 @@ void Jit64::lfsx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStoreFloating) - fpr.Lock(inst.RS); - fpr.LoadToX64(inst.RS, false, true); MOV(32, R(EAX), gpr.R(inst.RB)); if (inst.RA) + { ADD(32, R(EAX), gpr.R(inst.RA)); - if (cpu_info.bSSSE3) { + } + if (cpu_info.bSSSE3 && !js.memcheck) { + fpr.Lock(inst.RS); + fpr.LoadToX64(inst.RS, false, true); X64Reg r = fpr.R(inst.RS).GetSimpleReg(); #ifdef _M_IX86 AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); @@ -319,14 +353,25 @@ void Jit64::lfsx(UGeckoInstruction inst) #else MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0)); #endif + MEMCHECK_START + PSHUFB(r, M((void *)bswapShuffle1x4)); CVTSS2SD(r, R(r)); MOVDDUP(r, R(r)); + + MEMCHECK_END } else { - UnsafeLoadRegToReg(EAX, EAX, 32, false); + SafeLoadRegToEAX(EAX, 32, false); + + MEMCHECK_START + MOV(32, M(&temp32), R(EAX)); CVTSS2SD(XMM0, M(&temp32)); + fpr.Lock(inst.RS); + fpr.LoadToX64(inst.RS, false, true); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); + + MEMCHECK_END } fpr.UnlockAll(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 9809d29872..edef0a4bf6 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -54,6 +54,8 @@ void Jit64::psq_st(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStorePaired) + if (js.memcheck) { Default(inst); return; } + if (!inst.RA) { // TODO: Support these cases if it becomes necessary. @@ -136,6 +138,8 @@ void Jit64::psq_l(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(LoadStorePaired) + if (js.memcheck) { Default(inst); return; } + if (!inst.RA) { Default(inst); @@ -174,7 +178,13 @@ void Jit64::psq_l(UGeckoInstruction inst) ABI_AlignStack(0); CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized)); ABI_RestoreStack(0); + +// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access + CVTPS2PD(fpr.RX(inst.RS), R(XMM0)); + +// MEMCHECK_END + gpr.UnlockAll(); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index c33feb2614..56fa5bdf83 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -465,7 +465,6 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, } static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { - bool win32 = false; if (RI.UseProfile) { unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++]; if (!(curLoad & 0x0C000000)) { @@ -486,15 +485,8 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { if (RI.MakeProfile) { RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); } -#ifdef _M_IX86 - win32 = true; -#endif - FixupBranch argh; - if (!(win32 && SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1)) - { - RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000)); - argh = RI.Jit->J_CC(CC_Z); - } + RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000)); + FixupBranch argh = RI.Jit->J_CC(CC_Z); // Slow safe read using Memory::Read_Ux routines #ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it. @@ -514,14 +506,10 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { RI.Jit->POP(32, R(EAX)); #endif } - if (!(win32 && SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1)) - { - FixupBranch arg2 = RI.Jit->J(); - // Fast unsafe read using memory pointer EBX - RI.Jit->SetJumpTarget(argh); - RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false); - RI.Jit->SetJumpTarget(arg2); - } + FixupBranch arg2 = RI.Jit->J(); + RI.Jit->SetJumpTarget(argh); + RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false); + RI.Jit->SetJumpTarget(arg2); if (regReadUse(RI, I)) RI.regs[reg] = I; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index f9d7679f23..057056cab7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -184,7 +184,7 @@ void JitIL::Init() #else jo.enableFastMem = false; #endif - jo.assumeFPLoadFromMem = true; + jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem; jo.fpAccurateFcmp = false; jo.optimizeGatherPipe = true; jo.fastInterrupts = false; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp index fb98cd4f78..c973dec728 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp @@ -39,6 +39,7 @@ void JitIL::lhax(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -50,7 +51,8 @@ void JitIL::lhax(UGeckoInstruction inst) void JitIL::lXz(UGeckoInstruction inst) { INSTRUCTION_START - JITDISABLE(LoadStore) + JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -81,6 +83,7 @@ void JitIL::lha(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst((s32)(s16)inst.SIMM_16); if (inst.RA) @@ -94,6 +97,7 @@ void JitIL::lXzx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); if (inst.RA) { addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -141,6 +145,7 @@ void JitIL::stX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), value = ibuild.EmitLoadGReg(inst.RS); if (inst.RA) @@ -160,6 +165,7 @@ void JitIL::stXx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), value = ibuild.EmitLoadGReg(inst.RS); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -179,6 +185,7 @@ void JitIL::lmw(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -194,6 +201,7 @@ void JitIL::stmw(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStore) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp index 764fd709dc..e06d289cc4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStoreFloating.cpp @@ -43,6 +43,7 @@ void JitIL::lfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val; if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -56,6 +57,7 @@ void JitIL::lfd(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val; if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -70,6 +72,7 @@ void JitIL::stfd(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val = ibuild.EmitLoadFReg(inst.RS); if (inst.RA) @@ -85,6 +88,7 @@ void JitIL::stfs(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val = ibuild.EmitLoadFReg(inst.RS); if (inst.RA) @@ -101,6 +105,7 @@ void JitIL::stfsx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), val = ibuild.EmitLoadFReg(inst.RS); if (inst.RA) @@ -115,6 +120,7 @@ void JitIL::lfsx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStoreFloating) + if (js.memcheck) { Default(inst); return; } IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), val; if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp index 9818ee8589..769bf71c2c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp @@ -37,6 +37,7 @@ void JitIL::psq_st(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStorePaired) + if (js.memcheck) { Default(inst); return; } if (inst.W) {Default(inst); return;} IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val; if (inst.RA) @@ -52,6 +53,7 @@ void JitIL::psq_l(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(LoadStorePaired) + if (js.memcheck) { Default(inst); return; } if (inst.W) {Default(inst); return;} IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val; if (inst.RA) diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index e8a69ee903..d1d96ab513 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -31,15 +31,9 @@ using namespace Gen; -void EmuCodeBlock::JitClearCA() -{ - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 -} -void EmuCodeBlock::JitSetCA() -{ - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 -} +static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; +static u32 GC_ALIGNED16(float_buffer); void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { @@ -60,7 +54,9 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc SAR(32, R(reg_value), Imm8(16)); else SHR(32, R(reg_value), Imm8(16)); - } else if (signExtend) { + } + else if (signExtend) + { // TODO: bake 8-bit into the original load. MOVSX(32, accessSize, reg_value, R(reg_value)); } @@ -76,26 +72,53 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i #endif } -void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) +void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend) { - if (offset) - ADD(32, R(reg), Imm32((u32)offset)); - TEST(32, R(reg), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_Z); - switch (accessSize) + if (Core::g_CoreStartupParameter.bUseFastMem && accessSize == 32 && !Core::g_CoreStartupParameter.bMMU) { - case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg); break; - case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg); break; - case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg); break; + UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend); } - if (signExtend && accessSize < 32) { - // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + else + { + if (offset) + ADD(32, R(reg_addr), Imm32((u32)offset)); + + FixupBranch addrf0; + FixupBranch addr20; + if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) + { + CMP(32, R(reg_addr), Imm32(0xf0000000)); + addrf0 = J_CC(CC_GE); + TEST(32, R(reg_addr), Imm32(0x20000000)); + addr20 = J_CC(CC_NZ); + } + + TEST(32, R(reg_addr), Imm32(0x0C000000)); + FixupBranch fast = J_CC(CC_Z); + + if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) + { + SetJumpTarget(addr20); + SetJumpTarget(addrf0); + } + + switch (accessSize) + { + case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg_addr); break; + case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg_addr); break; + case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg_addr); break; + } + if (signExtend && accessSize < 32) + { + // Need to sign extend values coming from the Read_U* functions. + MOVSX(32, accessSize, EAX, R(EAX)); + } + + FixupBranch exit = J(); + SetJumpTarget(fast); + UnsafeLoadRegToReg(reg_addr, EAX, accessSize, 0, signExtend); + SetJumpTarget(exit); } - FixupBranch arg2 = J(); - SetJumpTarget(argh); - UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend); - SetJumpTarget(arg2); } void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) @@ -116,23 +139,47 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { if (offset) - ADD(32, R(reg_addr), Imm32(offset)); - TEST(32, R(reg_addr), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_Z); + ADD(32, R(reg_addr), Imm32((u32)offset)); + + // TODO: Figure out a cleaner way to check memory bounds + FixupBranch addrf0; + FixupBranch addr20; + FixupBranch fast; + if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) + { + CMP(32, R(reg_addr), Imm32(0xf0000000)); + addrf0 = J_CC(CC_GE); + TEST(32, R(reg_addr), Imm32(0x20000000)); + addr20 = J_CC(CC_NZ); + } + + if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem) + { + TEST(32, R(reg_addr), Imm32(0x0C000000)); + fast = J_CC(CC_Z); + } + + if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) + { + SetJumpTarget(addr20); + SetJumpTarget(addrf0); + } + switch (accessSize) { case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break; case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break; case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break; } - FixupBranch arg2 = J(); - SetJumpTarget(argh); - UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); - SetJumpTarget(arg2); -} + FixupBranch exit = J(); -static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -static u32 GC_ALIGNED16(float_buffer); + if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem) + { + SetJumpTarget(fast); + } + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); + SetJumpTarget(exit); +} void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr) { @@ -147,12 +194,12 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr) FixupBranch arg2 = J(); SetJumpTarget(argh); PSHUFB(xmm_value, M((void *)pbswapShuffle1x4)); - #ifdef _M_IX86 +#ifdef _M_IX86 AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value); - #else +#else MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value); - #endif +#endif SetJumpTarget(arg2); } else { MOVSS(M(&float_buffer), xmm_value); @@ -197,3 +244,13 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) { CVTPS2PD(xmm, R(xmm)); } } + +void EmuCodeBlock::JitClearCA() +{ + AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 +} + +void EmuCodeBlock::JitSetCA() +{ + OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 +}