From 0dd3804cf77c5edef79aa12e4e82770814f07a7d Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 7 Jan 2015 14:57:55 -0600 Subject: [PATCH] [AArch64] Implement 13 integer instructions. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 11 + .../PowerPC/JitArm64/JitArm64_Integer.cpp | 279 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 28 +- 3 files changed, 304 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 45b2f9bf91..d3e51ee168 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -82,6 +82,7 @@ public: // Integer void arith_imm(UGeckoInstruction inst); void boolX(UGeckoInstruction inst); + void addx(UGeckoInstruction inst); void extsXx(UGeckoInstruction inst); void cntlzwx(UGeckoInstruction inst); void negx(UGeckoInstruction inst); @@ -89,6 +90,14 @@ public: void cmpl(UGeckoInstruction inst); void cmpi(UGeckoInstruction inst); void cmpli(UGeckoInstruction inst); + void rlwinmx(UGeckoInstruction inst); + void srawix(UGeckoInstruction inst); + void mullwx(UGeckoInstruction inst); + void addic(UGeckoInstruction inst); + void mulli(UGeckoInstruction inst); + void addzex(UGeckoInstruction inst); + void subfx(UGeckoInstruction inst); + void addcx(UGeckoInstruction inst); // System Registers void mtmsr(UGeckoInstruction inst); @@ -144,6 +153,8 @@ private: void ComputeRC(Arm64Gen::ARM64Reg reg, int crf = 0); void ComputeRC(u32 imm, int crf = 0); + void ComputeCarry(bool Carry); + void ComputeCarry(); typedef u32 (*Operation)(u32, u32); void reg_imm(u32 d, u32 a, bool binary, u32 value, Operation do_op, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, ArithOption), bool Rc = false); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a1a7ffa005..14da56ec7c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -39,6 +39,28 @@ void JitArm64::ComputeRC(u32 imm, int crf) gpr.Unlock(WA); } +void JitArm64::ComputeCarry(bool Carry) +{ + if (Carry) + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, 1); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + gpr.Unlock(WA); + return; + } + + STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca)); +} + +void JitArm64::ComputeCarry() +{ + ARM64Reg WA = gpr.GetReg(); + CSINC(WA, WSP, WSP, CC_CC); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + gpr.Unlock(WA); +} + // Following static functions are used in conjunction with reg_imm static u32 Add(u32 a, u32 b) { @@ -245,6 +267,29 @@ void JitArm64::boolX(UGeckoInstruction inst) } } +void JitArm64::addx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); + gpr.SetImmediate(d, i + j); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + ADD(gpr.R(d), gpr.R(a), gpr.R(b)); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + void JitArm64::extsXx(UGeckoInstruction inst) { INSTRUCTION_START @@ -415,3 +460,237 @@ void JitArm64::cmpli(UGeckoInstruction inst) FALLBACK_IF(true); } +void JitArm64::rlwinmx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + u32 mask = Helper_Mask(inst.MB,inst.ME); + if (gpr.IsImm(inst.RS)) + { + gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask); + if (inst.Rc) + ComputeRC(gpr.GetImm(inst.RA), 0); + return; + } + + gpr.BindToRegister(inst.RA, inst.RA == inst.RS); + + ARM64Reg WA = gpr.GetReg(); + ArithOption Shift(gpr.R(inst.RS), ST_ROR, 32 - inst.SH); + MOVI2R(WA, mask); + AND(gpr.R(inst.RA), WA, gpr.R(inst.RS), Shift); + gpr.Unlock(WA); + + if (inst.Rc) + ComputeRC(gpr.R(inst.RA), 0); +} + +void JitArm64::srawix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA; + int s = inst.RS; + int amount = inst.SH; + + if (gpr.IsImm(s)) + { + s32 imm = (s32)gpr.GetImm(s); + gpr.SetImmediate(a, imm >> amount); + + if (amount != 0 && (imm < 0) && (imm << (32 - amount))) + ComputeCarry(true); + else + ComputeCarry(false); + } + else if (amount != 0) + { + gpr.BindToRegister(a, a == s); + ARM64Reg RA = gpr.R(a); + ARM64Reg RS = gpr.R(s); + ARM64Reg WA = gpr.GetReg(); + + ORR(WA, WSP, RS, ArithOption(RS, ST_LSL, 32 - amount)); + ORR(RA, WSP, RS, ArithOption(RS, ST_ASR, amount)); + if (inst.Rc) + ComputeRC(RA, 0); + + ANDS(WSP, WA, RA, ArithOption(RA, ST_LSL, 0)); + CSINC(WA, WSP, WSP, CC_EQ); + STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + gpr.Unlock(WA); + } + else + { + gpr.BindToRegister(a, a == s); + ARM64Reg RA = gpr.R(a); + ARM64Reg RS = gpr.R(s); + MOV(RA, RS); + STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca)); + } +} + +void JitArm64::addic(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA, d = inst.RD; + bool rc = inst.OPCD == 13; + s32 simm = inst.SIMM_16; + u32 imm = (u32)simm; + + if (gpr.IsImm(a)) + { + + u32 i = gpr.GetImm(a); + gpr.SetImmediate(d, i + imm); + + bool has_carry = Interpreter::Helper_Carry(i, imm); + ComputeCarry(has_carry); + if (rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a); + if (imm < 4096) + { + ADDS(gpr.R(d), gpr.R(a), imm); + } + else if (simm > -4096 && simm < 0) + { + SUBS(gpr.R(d), gpr.R(a), std::abs(simm)); + } + else + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, imm); + ADDS(gpr.R(d), gpr.R(a), WA); + gpr.Unlock(WA); + } + + ComputeCarry(); + if (rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::mulli(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, d = inst.RD; + + if (gpr.IsImm(a)) + { + s32 i = (s32)gpr.GetImm(a); + gpr.SetImmediate(d, i * inst.SIMM_16); + } + else + { + gpr.BindToRegister(d, d == a); + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, (u32)(s32)inst.SIMM_16); + MUL(gpr.R(d), gpr.R(a), WA); + gpr.Unlock(WA); + } +} + +void JitArm64::mullwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); + gpr.SetImmediate(d, i * j); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + MUL(gpr.R(d), gpr.R(a), gpr.R(b)); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::addzex(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, d = inst.RD; + + gpr.BindToRegister(d, d == a); + ARM64Reg WA = gpr.GetReg(); + LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + CMP(WA, 1); + CSINC(gpr.R(d), gpr.R(a), gpr.R(a), CC_NEQ); + CMP(gpr.R(d), 0); + gpr.Unlock(WA); + ComputeCarry(); +} + +void JitArm64::subfx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + u32 i = gpr.GetImm(a), j = gpr.GetImm(b); + gpr.SetImmediate(d, j - i); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + SUB(gpr.R(d), gpr.R(b), gpr.R(a)); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::addcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + FALLBACK_IF(inst.OE); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + u32 i = gpr.GetImm(a), j = gpr.GetImm(b); + gpr.SetImmediate(d, i * j); + + bool has_carry = Interpreter::Helper_Carry(i, j); + ComputeCarry(has_carry); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + ADDS(gpr.R(d), gpr.R(a), gpr.R(b)); + + ComputeCarry(); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 13e02bfa4a..fae0a3bdc9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -45,17 +45,17 @@ static GekkoOPTemplate primarytable[] = {3, &JitArm64::twx}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, {17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, - {7, &JitArm64::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, + {7, &JitArm64::mulli}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {10, &JitArm64::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &JitArm64::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, - {12, &JitArm64::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {13, &JitArm64::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, + {12, &JitArm64::addic}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {13, &JitArm64::addic}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, {14, &JitArm64::arith_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {15, &JitArm64::arith_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, - {21, &JitArm64::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {21, &JitArm64::rlwinmx}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, {24, &JitArm64::arith_imm}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, @@ -196,7 +196,7 @@ static GekkoOPTemplate table31[] = {954, &JitArm64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, - {824, &JitArm64::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {824, &JitArm64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, @@ -294,25 +294,25 @@ static GekkoOPTemplate table31[] = static GekkoOPTemplate table31_2[] = { - {266, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {778, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {10, &JitArm64::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, - {522, &JitArm64::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {266, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {778, &JitArm64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {10, &JitArm64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {522, &JitArm64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, - {202, &JitArm64::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {202, &JitArm64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {235, &JitArm64::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {747, &JitArm64::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {235, &JitArm64::mullwx}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {747, &JitArm64::mullwx}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {104, &JitArm64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {40, &JitArm64::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, - {552, &JitArm64::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {40, &JitArm64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {552, &JitArm64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},