diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4a22d1c964..8555f2b5f2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (!SConfig::GetInstance().bEnableDebugging) js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC); + // Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps + if (op.inst.OPCD != 46 && op.inst.OPCD != 47) + gpr.UpdateLastUsed(op.regsIn | op.regsOut); + + BitSet32 fpr_used = op.fregsIn; + if (op.fregOut >= 0) + fpr_used[op.fregOut] = true; + fpr.UpdateLastUsed(fpr_used); + // Gather pipe writes using a non-immediate address are discovered by profiling. bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 8a4eddaf5e..08e3d2ab9b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst) bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_b = op5 != 25; // fmul uses no B - bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && - (!use_c || fpr.IsSingle(c, !packed)); + const auto inputs_are_singles_func = [&] { + return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && + (!use_c || fpr.IsSingle(c, !packed)); + }; + const bool inputs_are_singles = inputs_are_singles_func(); ARM64Reg VA{}, VB{}, VC{}, VD{}; @@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst) } } + ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(), + "Register allocation turned singles into doubles in the middle of fp_arith"); + if (single || packed) fpr.FixSinglePrecision(d); } @@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst) break; } } + + ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed), + "Register allocation turned singles into doubles in the middle of fp_logic"); } void JitArm64::fselx(UGeckoInstruction inst) @@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst) const u32 c = inst.FC; const u32 d = inst.FD; + const bool a_single = fpr.IsSingle(a, true); if (fpr.IsSingle(a, true)) { const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle); @@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst) m_float_emit.FCMPE(EncodeRegToDouble(VA)); } - const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true); - const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair; - const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble; + const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true); + const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair; + const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble; const ARM64Reg VB = fpr.R(b, type); const ARM64Reg VC = fpr.R(c, type); const ARM64Reg VD = fpr.RW(d, type); m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE); + + ASSERT_MSG(DYNA_REC, + a_single == fpr.IsSingle(a, true) && + b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)), + "Register allocation turned singles into doubles in the middle of fselx"); } void JitArm64::frspx(UGeckoInstruction inst) @@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst) const u32 b = inst.FB; const u32 d = inst.FD; - if (fpr.IsSingle(b, true)) + const bool single = fpr.IsSingle(b, true); + if (single) { // Source is already in single precision, so no need to do anything but to copy to PSR1. const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle); @@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst) m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); } + + ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true), + "Register allocation turned singles into doubles in the middle of frspx"); } void JitArm64::fcmpX(UGeckoInstruction inst) @@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst) SetJumpTarget(continue3); } SetJumpTarget(continue1); + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)), + "Register allocation turned singles into doubles in the middle of fcmpX"); } void JitArm64::fctiwzx(UGeckoInstruction inst) @@ -334,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) const bool single = fpr.IsSingle(b, true); const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair); - const ARM64Reg VD = fpr.RW(d); + const ARM64Reg VD = fpr.RW(d, RegType::LowerPair); const ARM64Reg V0 = fpr.GetReg(); @@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) } m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); fpr.Unlock(V0); + + ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true), + "Register allocation turned singles into doubles in the middle of fctiwzx"); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index faa58e6ad0..00cd92c39b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst) ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op"); break; } + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)), + "Register allocation turned singles into doubles in the middle of ps_mergeXX"); } void JitArm64::ps_mulsX(UGeckoInstruction inst) @@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0); + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_mulsX"); + fpr.FixSinglePrecision(d); } @@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op"); break; } + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_maddXX"); + fpr.FixSinglePrecision(d); if (V0Q != INVALID_REG) @@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst) m_float_emit.MOV(VD, V0); fpr.Unlock(V0Q); } + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_sel"); } void JitArm64::ps_sumX(UGeckoInstruction inst) @@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0); } + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_sumX"); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 4a3f255c71..d0d861d992 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg() // Holy cow, how did you run out of registers? // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure // happen - WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb"); + ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!"); return INVALID_REG; } +void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used) +{ + for (size_t i = 0; i < m_guest_registers.size(); ++i) + { + OpArg& reg = m_guest_registers[i]; + if (i < 32 && regs_used[i]) + reg.ResetLastUsed(); + else + reg.IncrementLastUsed(); + } +} + u32 Arm64RegCache::GetUnlockedRegisterCount() const { u32 unlocked_registers = 0; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index b074001830..55c86bd4ab 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -140,6 +140,8 @@ public: // Requires unlocking after done Arm64Gen::ARM64Reg GetReg(); + void UpdateLastUsed(BitSet32 regs_used); + // Locks a register so a cache cannot use it // Useful for function calls template @@ -281,9 +283,9 @@ public: // Returns a guest register inside of a host register // Will dump an immediate to the host register as well - Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair); + Arm64Gen::ARM64Reg R(size_t preg, RegType type); - Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair); + Arm64Gen::ARM64Reg RW(size_t preg, RegType type); BitSet32 GetCallerSavedUsed() const override;