diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index efbc2bb759..192a0203a9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -215,7 +215,6 @@ private: // AsmRoutines void GenerateAsm(); void GenerateCommonAsm(); - void GenMfcr(); // Profiling void BeginTimeProfile(JitBlock* b); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 79ad36faab..2e55f7001b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -268,8 +268,8 @@ void JitArm64::fcmpX(UGeckoInstruction inst) ARM64Reg VA = reg_encoder(fpr.R(a, type)); ARM64Reg VB = reg_encoder(fpr.R(b, type)); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); + gpr.BindCRToRegister(crf, false); + ARM64Reg XA = gpr.CR(crf); FixupBranch pNaN, pLesser, pGreater; FixupBranch continue1, continue2, continue3; @@ -312,10 +312,6 @@ void JitArm64::fcmpX(UGeckoInstruction inst) SetJumpTarget(continue3); } SetJumpTarget(continue1); - - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); - - gpr.Unlock(WA); } void JitArm64::fctiwzx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index bde2903917..fd38472059 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -17,33 +17,23 @@ using namespace Arm64Gen; void JitArm64::ComputeRC(ARM64Reg reg, int crf, bool needs_sext) { + gpr.BindCRToRegister(crf, false); if (needs_sext) { - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - - SXTW(XA, reg); - - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); - gpr.Unlock(WA); + SXTW(gpr.CR(crf), reg); } else { - STR(INDEX_UNSIGNED, EncodeRegTo64(reg), PPC_REG, PPCSTATE_OFF(cr_val[crf])); + MOV(gpr.CR(crf), EncodeRegTo64(reg)); } } void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext) { - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - - MOVI2R(XA, imm); + gpr.BindCRToRegister(crf, false); + MOVI2R(gpr.CR(crf), imm); if (imm & 0x80000000 && needs_sext) - SXTW(XA, WA); - - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); - gpr.Unlock(WA); + SXTW(gpr.CR(crf), DecodeReg(gpr.CR(crf))); } void JitArm64::ComputeCarry(bool Carry) @@ -425,19 +415,18 @@ void JitArm64::cmp(UGeckoInstruction inst) return; } + gpr.BindCRToRegister(crf, false); ARM64Reg WA = gpr.GetReg(); - ARM64Reg WB = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - ARM64Reg XB = EncodeRegTo64(WB); ARM64Reg RA = gpr.R(a); ARM64Reg RB = gpr.R(b); + ARM64Reg CR = gpr.CR(crf); + SXTW(XA, RA); - SXTW(XB, RB); + SXTW(CR, RB); + SUB(CR, XA, CR); - SUB(XA, XA, XB); - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); - - gpr.Unlock(WA, WB); + gpr.Unlock(WA); } void JitArm64::cmpl(UGeckoInstruction inst) @@ -459,11 +448,8 @@ void JitArm64::cmpl(UGeckoInstruction inst) return; } - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b))); - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); - gpr.Unlock(WA); + gpr.BindCRToRegister(crf, false); + SUB(gpr.CR(crf), EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b))); } void JitArm64::cmpi(UGeckoInstruction inst) @@ -507,13 +493,10 @@ void JitArm64::cmpli(UGeckoInstruction inst) return; } - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); + gpr.BindCRToRegister(crf, false); + ARM64Reg XA = gpr.CR(crf); SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA); - - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); - gpr.Unlock(WA); } void JitArm64::rlwinmx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 5af4855a91..051b416f30 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -92,6 +92,15 @@ void Arm64RegCache::FlushMostStaleRegister() } // GPR Cache +constexpr size_t GUEST_GPR_COUNT = 32; +constexpr size_t GUEST_CR_COUNT = 8; +constexpr size_t GUEST_GPR_OFFSET = 0; +constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT; + +Arm64GPRCache::Arm64GPRCache() : Arm64RegCache(GUEST_GPR_COUNT + GUEST_CR_COUNT) +{ +} + void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats& stats) { } @@ -105,18 +114,48 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end(); } -void Arm64GPRCache::FlushRegister(size_t preg, bool maintain_state) +const OpArg& Arm64GPRCache::GetGuestGPROpArg(size_t preg) const { - OpArg& reg = m_guest_registers[preg]; + _assert_(preg < GUEST_GPR_COUNT); + return m_guest_registers[preg]; +} + +Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestGPR(size_t preg) +{ + _assert_(preg < GUEST_GPR_COUNT); + return {32, PPCSTATE_OFF(gpr[preg]), m_guest_registers[GUEST_GPR_OFFSET + preg]}; +} + +Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestCR(size_t preg) +{ + _assert_(preg < GUEST_CR_COUNT); + return {64, PPCSTATE_OFF(cr_val[preg]), m_guest_registers[GUEST_CR_OFFSET + preg]}; +} + +Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestByIndex(size_t index) +{ + if (index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT) + return GetGuestGPR(index - GUEST_GPR_OFFSET); + if (index >= GUEST_CR_OFFSET && index < GUEST_CR_OFFSET + GUEST_CR_COUNT) + return GetGuestCR(index - GUEST_CR_OFFSET); + _assert_msg_(DYNA_REC, false, "Invalid index for guest register"); +} + +void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state) +{ + GuestRegInfo guest_reg = GetGuestByIndex(index); + OpArg& reg = guest_reg.reg; + size_t bitsize = guest_reg.bitsize; + if (reg.GetType() == REG_REG) { ARM64Reg host_reg = reg.GetReg(); if (reg.IsDirty()) - m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); + m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset); if (!maintain_state) { - UnlockRegister(host_reg); + UnlockRegister(DecodeReg(host_reg)); reg.Flush(); } } @@ -124,16 +163,16 @@ void Arm64GPRCache::FlushRegister(size_t preg, bool maintain_state) { if (!reg.GetImm()) { - m_emit->STR(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(gpr[preg])); + m_emit->STR(INDEX_UNSIGNED, bitsize == 64 ? ZR : WZR, PPC_REG, guest_reg.ppc_offset); } else { - ARM64Reg host_reg = GetReg(); + ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); m_emit->MOVI2R(host_reg, reg.GetImm()); - m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); + m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset); - UnlockRegister(host_reg); + UnlockRegister(DecodeReg(host_reg)); } if (!maintain_state) @@ -143,11 +182,11 @@ void Arm64GPRCache::FlushRegister(size_t preg, bool maintain_state) void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) { - for (size_t i = 0; i < m_guest_registers.size(); ++i) + for (size_t i = 0; i < GUEST_GPR_COUNT; ++i) { if (regs[i]) { - if (i < 31 && regs[i + 1]) + if (i + 1 < GUEST_GPR_COUNT && regs[i + 1]) { // We've got two guest registers in a row to store OpArg& reg1 = m_guest_registers[i]; @@ -155,14 +194,14 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) if (reg1.IsDirty() && reg2.IsDirty() && reg1.GetType() == REG_REG && reg2.GetType() == REG_REG) { - ARM64Reg RX1 = R(i); - ARM64Reg RX2 = R(i + 1); - - m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32)); + size_t ppc_offset = GetGuestByIndex(i).ppc_offset; + ARM64Reg RX1 = R(GetGuestByIndex(i)); + ARM64Reg RX2 = R(GetGuestByIndex(i + 1)); + m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, ppc_offset); if (!maintain_state) { - UnlockRegister(RX1); - UnlockRegister(RX2); + UnlockRegister(DecodeReg(RX1)); + UnlockRegister(DecodeReg(RX2)); reg1.Flush(); reg2.Flush(); } @@ -171,7 +210,18 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) } } - FlushRegister(i, maintain_state); + FlushRegister(GUEST_GPR_OFFSET + i, maintain_state); + } + } +} + +void Arm64GPRCache::FlushCRRegisters(BitSet32 regs, bool maintain_state) +{ + for (size_t i = 0; i < GUEST_CR_COUNT; ++i) + { + if (regs[i]) + { + FlushRegister(GUEST_CR_OFFSET + i, maintain_state); } } } @@ -179,7 +229,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) { BitSet32 to_flush; - for (size_t i = 0; i < m_guest_registers.size(); ++i) + for (size_t i = 0; i < GUEST_GPR_COUNT; ++i) { bool flush = true; if (m_guest_registers[i].GetType() == REG_REG) @@ -192,11 +242,14 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) to_flush[i] = flush; } FlushRegisters(to_flush, mode == FLUSH_MAINTAIN_STATE); + FlushCRRegisters(BitSet32(~0U), mode == FLUSH_MAINTAIN_STATE); } -ARM64Reg Arm64GPRCache::R(size_t preg) +ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg) { - OpArg& reg = m_guest_registers[preg]; + OpArg& reg = guest_reg.reg; + size_t bitsize = guest_reg.bitsize; + IncrementAllUsed(); reg.ResetLastUsed(); @@ -207,7 +260,7 @@ ARM64Reg Arm64GPRCache::R(size_t preg) break; case REG_IMM: // Is an immediate { - ARM64Reg host_reg = GetReg(); + ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); m_emit->MOVI2R(host_reg, reg.GetImm()); reg.Load(host_reg); reg.SetDirty(true); @@ -219,10 +272,10 @@ ARM64Reg Arm64GPRCache::R(size_t preg) // This is a bit annoying. We try to keep these preloaded as much as possible // This can also happen on cases where PPCAnalyst isn't feeing us proper register usage // statistics - ARM64Reg host_reg = GetReg(); + ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); reg.SetDirty(false); - m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); + m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset); return host_reg; } break; @@ -234,27 +287,28 @@ ARM64Reg Arm64GPRCache::R(size_t preg) return INVALID_REG; } -void Arm64GPRCache::SetImmediate(size_t preg, u32 imm) +void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm) { - OpArg& reg = m_guest_registers[preg]; + OpArg& reg = guest_reg.reg; if (reg.GetType() == REG_REG) - UnlockRegister(reg.GetReg()); + UnlockRegister(DecodeReg(reg.GetReg())); reg.LoadToImm(imm); } -void Arm64GPRCache::BindToRegister(size_t preg, bool do_load) +void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load) { - OpArg& reg = m_guest_registers[preg]; + OpArg& reg = guest_reg.reg; + size_t bitsize = guest_reg.bitsize; reg.ResetLastUsed(); reg.SetDirty(true); if (reg.GetType() == REG_NOTLOADED) { - ARM64Reg host_reg = GetReg(); + ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); if (do_load) - m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); + m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset); } } @@ -278,16 +332,17 @@ BitSet32 Arm64GPRCache::GetCallerSavedUsed() BitSet32 registers(0); for (auto& it : m_host_registers) if (it.IsLocked() && !IsCalleeSaved(it.GetReg())) - registers[it.GetReg()] = 1; + registers[DecodeReg(it.GetReg())] = 1; return registers; } void Arm64GPRCache::FlushByHost(ARM64Reg host_reg) { + host_reg = DecodeReg(host_reg); for (size_t i = 0; i < m_guest_registers.size(); ++i) { const OpArg& reg = m_guest_registers[i]; - if (reg.GetType() == REG_REG && reg.GetReg() == host_reg) + if (reg.GetType() == REG_REG && DecodeReg(reg.GetReg()) == host_reg) { FlushRegister(i, false); return; @@ -296,6 +351,12 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg) } // FPR Cache +constexpr size_t GUEST_FPR_COUNT = 32; + +Arm64FPRCache::Arm64FPRCache() : Arm64RegCache(GUEST_FPR_COUNT) +{ +} + void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) { for (size_t i = 0; i < m_guest_registers.size(); ++i) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index f65eec4ad4..8e9ab476bb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -118,7 +117,9 @@ private: class Arm64RegCache { public: - Arm64RegCache() : m_emit(nullptr), m_float_emit(nullptr), m_reg_stats(nullptr){}; + explicit Arm64RegCache(size_t guest_reg_count) + : m_emit(nullptr), m_float_emit(nullptr), m_guest_registers(guest_reg_count), + m_reg_stats(nullptr){}; virtual ~Arm64RegCache(){}; void Init(ARM64XEmitter* emitter); @@ -133,7 +134,6 @@ public: // Requires unlocking after done ARM64Reg GetReg(); - void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); } // Locks a register so a cache cannot use it // Useful for function calls template @@ -176,8 +176,6 @@ protected: virtual void FlushRegister(size_t preg, bool maintain_state) = 0; - virtual void FlushRegisters(BitSet32 regs, bool maintain_state) = 0; - // Get available host registers u32 GetUnlockedRegisterCount(); @@ -197,9 +195,9 @@ protected: std::vector m_host_registers; // Our guest GPRs - // PowerPC has 32 GPRs + // PowerPC has 32 GPRs and 8 CRs // PowerPC also has 32 paired FPRs - std::array m_guest_registers; + std::vector m_guest_registers; // Register stats for the current block PPCAnalyst::BlockRegStats* m_reg_stats; @@ -208,27 +206,32 @@ protected: class Arm64GPRCache : public Arm64RegCache { public: + Arm64GPRCache(); ~Arm64GPRCache() {} void Start(PPCAnalyst::BlockRegStats& stats) override; // Flushes the register cache in different ways depending on the mode void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr) override; - // Returns a guest register inside of a host register + // Returns a guest GPR inside of a host register // Will dump an immediate to the host register as well - ARM64Reg R(size_t preg); - - // Set a register to an immediate - void SetImmediate(size_t preg, u32 imm); - - // Returns if a register is set as an immediate - bool IsImm(size_t reg) const { return m_guest_registers[reg].GetType() == REG_IMM; } - // Gets the immediate that a register is set to - u32 GetImm(size_t reg) const { return m_guest_registers[reg].GetImm(); } - void BindToRegister(size_t preg, bool do_load); - + ARM64Reg R(size_t preg) { return R(GetGuestGPR(preg)); } + // Returns a guest CR inside of a host register + ARM64Reg CR(size_t preg) { return R(GetGuestCR(preg)); } + // Set a register to an immediate, only valid for guest GPRs + void SetImmediate(size_t preg, u32 imm) { SetImmediate(GetGuestGPR(preg), imm); } + // Returns if a register is set as an immediate, only valid for guest GPRs + bool IsImm(size_t preg) const { return GetGuestGPROpArg(preg).GetType() == REG_IMM; } + // Gets the immediate that a register is set to, only valid for guest GPRs + u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); } + // Binds a guest GPR to a host register, optionally loading its value + void BindToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestGPR(preg), do_load); } + // Binds a guest CR to a host register, optionally loading its value + void BindCRToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestCR(preg), do_load); } BitSet32 GetCallerSavedUsed() override; + void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); } + void StoreCRRegisters(BitSet32 regs) { FlushCRRegisters(regs, false); } protected: // Get the order of the host registers void GetAllocationOrder() override; @@ -236,17 +239,35 @@ protected: // Flushes a guest register by host provided void FlushByHost(ARM64Reg host_reg) override; - void FlushRegister(size_t preg, bool maintain_state) override; - - void FlushRegisters(BitSet32 regs, bool maintain_state) override; + void FlushRegister(size_t index, bool maintain_state) override; private: bool IsCalleeSaved(ARM64Reg reg); + + struct GuestRegInfo + { + size_t bitsize; + size_t ppc_offset; + OpArg& reg; + }; + + const OpArg& GetGuestGPROpArg(size_t preg) const; + GuestRegInfo GetGuestGPR(size_t preg); + GuestRegInfo GetGuestCR(size_t preg); + GuestRegInfo GetGuestByIndex(size_t index); + + ARM64Reg R(const GuestRegInfo& guest_reg); + void SetImmediate(const GuestRegInfo& guest_reg, u32 imm); + void BindToRegister(const GuestRegInfo& guest_reg, bool do_load); + + void FlushRegisters(BitSet32 regs, bool maintain_state); + void FlushCRRegisters(BitSet32 regs, bool maintain_state); }; class Arm64FPRCache : public Arm64RegCache { public: + Arm64FPRCache(); ~Arm64FPRCache() {} // Flushes the register cache in different ways depending on the mode void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr) override; @@ -263,6 +284,7 @@ public: void FixSinglePrecision(size_t preg); + void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); } protected: // Get the order of the host registers void GetAllocationOrder() override; @@ -272,8 +294,8 @@ protected: void FlushRegister(size_t preg, bool maintain_state) override; - void FlushRegisters(BitSet32 regs, bool maintain_state) override; - private: bool IsCalleeSaved(ARM64Reg reg); + + void FlushRegisters(BitSet32 regs, bool maintain_state); }; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 4d01d44b3f..0a06ac51a1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -14,35 +14,23 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); + ARM64Reg XA = gpr.CR(field); + ARM64Reg WA = DecodeReg(XA); - FixupBranch branch; switch (bit) { case CR_SO_BIT: // check bit 61 set - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); - branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61); - break; + return jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61); case CR_EQ_BIT: // check bits 31-0 == 0 - LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(cr_val[field])); - branch = jump_if_set ? CBZ(WA) : CBNZ(WA); - break; + return jump_if_set ? CBZ(WA) : CBNZ(WA); case CR_GT_BIT: // check val > 0 - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); CMP(XA, SP); - branch = B(jump_if_set ? CC_GT : CC_LE); - break; + return B(jump_if_set ? CC_GT : CC_LE); case CR_LT_BIT: // check bit 62 set - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); - branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62); - break; + return jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62); default: _assert_msg_(DYNA_REC, false, "Invalid CR bit"); } - - gpr.Unlock(WA); - return branch; } void JitArm64::mtmsr(UGeckoInstruction inst) @@ -79,11 +67,8 @@ void JitArm64::mcrf(UGeckoInstruction inst) if (inst.CRFS != inst.CRFD) { - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFS])); - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD])); - gpr.Unlock(WA); + gpr.BindCRToRegister(inst.CRFD, false); + MOV(gpr.CR(inst.CRFD), gpr.CR(inst.CRFS)); } } @@ -92,10 +77,11 @@ void JitArm64::mcrxr(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); + gpr.BindCRToRegister(inst.CRFD, false); ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - ARM64Reg WB = gpr.GetReg(); - ARM64Reg XB = EncodeRegTo64(WB); + ARM64Reg XB = gpr.CR(inst.CRFD); + ARM64Reg WB = DecodeReg(XB); // Copy XER[0-3] into CR[inst.CRFD] LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); @@ -108,13 +94,12 @@ void JitArm64::mcrxr(UGeckoInstruction inst) MOVP2R(XB, m_crTable.data()); LDR(XB, XB, XA); - STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD])); // Clear XER[0-3] STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_ca)); STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov)); - gpr.Unlock(WA, WB); + gpr.Unlock(WA); } void JitArm64::mfsr(UGeckoInstruction inst) @@ -435,9 +420,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); switch (bit) { case CR_SO_BIT: @@ -456,8 +440,6 @@ void JitArm64::crXXX(UGeckoInstruction inst) AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) break; } - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); - gpr.Unlock(WA); return; } @@ -468,9 +450,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); if (bit != CR_GT_BIT) { @@ -502,9 +483,6 @@ void JitArm64::crXXX(UGeckoInstruction inst) } ORR(XA, XA, 32, 0, true); // XA | 1<<32 - - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); - gpr.Unlock(WA); return; } @@ -527,9 +505,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg out = i ? XB : XA; bool negate = i ? negateB : negateA; - ARM64Reg WC = gpr.GetReg(); - ARM64Reg XC = EncodeRegTo64(WC); - LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val[field])); + ARM64Reg XC = gpr.CR(field); + ARM64Reg WC = DecodeReg(XC); switch (bit) { case CR_SO_BIT: // check bit 61 set @@ -557,7 +534,6 @@ void JitArm64::crXXX(UGeckoInstruction inst) default: _assert_msg_(DYNA_REC, false, "Invalid CR bit"); } - gpr.Unlock(WC); } // Compute combined bit @@ -585,7 +561,10 @@ void JitArm64::crXXX(UGeckoInstruction inst) int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); - LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field])); + gpr.Unlock(WB); + WB = INVALID_REG; + gpr.BindCRToRegister(field, true); + XB = gpr.CR(field); // Gross but necessary; if the input is totally zero and we set SO or LT, // or even just add the (1<<32), GT will suddenly end up set without us @@ -623,10 +602,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) } ORR(XB, XB, 32, 0, true); // XB | 1<<32 - STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field])); gpr.Unlock(WA); - gpr.Unlock(WB); } void JitArm64::mfcr(UGeckoInstruction inst) @@ -634,14 +611,44 @@ void JitArm64::mfcr(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - gpr.Lock(W0, W1, W2, W30); - BL(GetAsmRoutines()->mfcr); - gpr.Unlock(W1, W2, W30); - gpr.BindToRegister(inst.RD, false); - MOV(gpr.R(inst.RD), W0); + ARM64Reg WA = gpr.R(inst.RD); + ARM64Reg WC = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ARM64Reg XC = EncodeRegTo64(WC); - gpr.Unlock(W0); + for (int i = 0; i < 8; i++) + { + ARM64Reg CR = gpr.CR(i); + ARM64Reg WCR = DecodeReg(CR); + + // SO + if (i == 0) + { + UBFX(XA, CR, 61, 1); + } + else + { + UBFX(XC, CR, 61, 1); + ORR(XA, XC, XA, ArithOption(XA, ST_LSL, 4)); + } + + // EQ + ORR(WC, WA, 32 - 1, 0); // WA | 1<<1 + CMP(WCR, WZR); + CSEL(WA, WC, WA, CC_EQ); + + // GT + ORR(WC, WA, 32 - 2, 0); // WA | 1<<2 + CMP(CR, ZR); + CSEL(WA, WC, WA, CC_GT); + + // LT + UBFX(XC, CR, 62, 1); + ORR(WA, WA, WC, ArithOption(WC, ST_LSL, 3)); + } + + gpr.Unlock(WC); } void JitArm64::mtcrf(UGeckoInstruction inst) @@ -653,8 +660,6 @@ void JitArm64::mtcrf(UGeckoInstruction inst) if (crm != 0) { ARM64Reg RS = gpr.R(inst.RS); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg WB = gpr.GetReg(); ARM64Reg XB = EncodeRegTo64(WB); MOVP2R(XB, m_crTable.data()); @@ -662,20 +667,23 @@ void JitArm64::mtcrf(UGeckoInstruction inst) { if ((crm & (0x80 >> i)) != 0) { + gpr.BindCRToRegister(i, false); + ARM64Reg CR = gpr.CR(i); + ARM64Reg WCR = DecodeReg(CR); + if (i != 7) - LSR(WA, RS, 28 - i * 4); + LSR(WCR, RS, 28 - i * 4); if (i != 0) { if (i != 7) - UBFX(WA, WA, 0, 4); + UBFX(WCR, WCR, 0, 4); else - UBFX(WA, RS, 0, 4); + UBFX(WCR, RS, 0, 4); } - LDR(XA, XB, ArithOption(XA, true)); - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[i])); + LDR(CR, XB, ArithOption(CR, true)); } } - gpr.Unlock(WA, WB); + gpr.Unlock(WB); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index dfc37bef1b..603c36d38c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -651,47 +651,5 @@ void JitArm64::GenerateCommonAsm() pairedStoreQuantized[30] = storeSingleS8Slow; pairedStoreQuantized[31] = storeSingleS16Slow; - GetAsmRoutines()->mfcr = AlignCode16(); - GenMfcr(); -} - -void JitArm64::GenMfcr() -{ - // Input: Nothing - // Returns: W0 - // Clobbers: X1, X2 - const u8* start = GetCodePtr(); - for (int i = 0; i < 8; i++) - { - LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val[i])); - - // SO - if (i == 0) - { - UBFX(X0, X1, 61, 1); - } - else - { - ORR(W0, WZR, W0, ArithOption(W0, ST_LSL, 4)); - UBFX(X2, X1, 61, 1); - ORR(X0, X0, X2); - } - - // EQ - ORR(W2, W0, 32 - 1, 0); // W0 | 1<<1 - CMP(W1, WZR); - CSEL(W0, W2, W0, CC_EQ); - - // GT - ORR(W2, W0, 32 - 2, 0); // W0 | 1<<2 - CMP(X1, ZR); - CSEL(W0, W2, W0, CC_GT); - - // LT - UBFX(X2, X1, 62, 1); - ORR(W0, W0, W2, ArithOption(W2, ST_LSL, 3)); - } - - RET(X30); - JitRegister::Register(start, GetCodePtr(), "JIT_Mfcr"); + GetAsmRoutines()->mfcr = nullptr; }