Merge pull request #4210 from degasus/arm

JitArm64: Small cleanup + speedups.
This commit is contained in:
Markus Wick 2016-09-27 18:45:14 +02:00 committed by GitHub
commit 3696c2b022
5 changed files with 62 additions and 79 deletions

View File

@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <algorithm> #include <algorithm>
#include <array>
#include <cstring> #include <cstring>
#include <vector> #include <vector>
@ -200,10 +201,10 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// To repeat a value every d bits, we multiply it by a number of the form // To repeat a value every d bits, we multiply it by a number of the form
// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
// be derived using a table lookup on CLZ(d). // be derived using a table lookup on CLZ(d).
static const std::array<uint64_t, 6> multipliers = { static const std::array<uint64_t, 6> multipliers = {{
0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, 0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL,
0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL, 0x1111111111111111UL, 0x5555555555555555UL,
}; }};
int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57; int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;

View File

@ -120,6 +120,26 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
SetJumpTarget(c); SetJumpTarget(c);
} }
} }
if (jo.memcheck && (js.op->opinfo->flags & FL_LOADSTORE))
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));
FixupBranch handleException = B();
SwitchToFarCode();
SetJumpTarget(handleException);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExceptionExit(js.compilerPC);
SwitchToNearCode();
SetJumpTarget(noException);
gpr.Unlock(WA);
}
} }
void JitArm64::HLEFunction(UGeckoInstruction inst) void JitArm64::HLEFunction(UGeckoInstruction inst)
@ -598,26 +618,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
// If we have a register that will never be used again, flush it. // If we have a register that will never be used again, flush it.
gpr.StoreRegisters(~ops[i].gprInUse); gpr.StoreRegisters(~ops[i].gprInUse);
fpr.StoreRegisters(~ops[i].fprInUse); fpr.StoreRegisters(~ops[i].fprInUse);
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI));
FixupBranch handleException = B();
SwitchToFarCode();
SetJumpTarget(handleException);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExceptionExit(js.compilerPC);
SwitchToNearCode();
SetJumpTarget(noException);
gpr.Unlock(WA);
}
} }
i += js.skipInstructions; i += js.skipInstructions;

View File

@ -238,9 +238,6 @@ private:
void ComputeCarry(bool Carry); void ComputeCarry(bool Carry);
void ComputeCarry(); void ComputeCarry();
typedef u32 (*Operation)(u32, u32); void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
void reg_imm(u32 d, u32 a, u32 value, Operation do_op, void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false);
void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg,
ArithOption),
bool Rc = false);
}; };

View File

@ -75,25 +75,8 @@ void JitArm64::ComputeCarry()
gpr.Unlock(WA); gpr.Unlock(WA);
} }
// Following static functions are used in conjunction with reg_imm void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
static u32 Or(u32 a, u32 b) void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc)
{
return a | b;
}
static u32 And(u32 a, u32 b)
{
return a & b;
}
static u32 Xor(u32 a, u32 b)
{
return a ^ b;
}
void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op,
void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, ARM64Reg, ArithOption),
bool Rc)
{ {
if (gpr.IsImm(a)) if (gpr.IsImm(a))
{ {
@ -105,8 +88,7 @@ void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op,
{ {
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, value); (this->*op)(gpr.R(d), gpr.R(a), value, WA);
(this->*op)(gpr.R(d), gpr.R(a), WA, ArithOption(WA, ST_LSL, 0));
gpr.Unlock(WA); gpr.Unlock(WA);
if (Rc) if (Rc)
@ -128,22 +110,23 @@ void JitArm64::arith_imm(UGeckoInstruction inst)
// NOP // NOP
return; return;
} }
reg_imm(a, s, inst.UIMM, Or, &ARM64XEmitter::ORR); reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R);
break; break;
case 25: // oris case 25: // oris
reg_imm(a, s, inst.UIMM << 16, Or, &ARM64XEmitter::ORR); reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R);
break; break;
case 28: // andi case 28: // andi
reg_imm(a, s, inst.UIMM, And, &ARM64XEmitter::AND, true); reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R, true);
break; break;
case 29: // andis case 29: // andis
reg_imm(a, s, inst.UIMM << 16, And, &ARM64XEmitter::AND, true); reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R,
true);
break; break;
case 26: // xori case 26: // xori
reg_imm(a, s, inst.UIMM, Xor, &ARM64XEmitter::EOR); reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R);
break; break;
case 27: // xoris case 27: // xoris
reg_imm(a, s, inst.UIMM << 16, Xor, &ARM64XEmitter::EOR); reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R);
break; break;
} }
} }
@ -272,37 +255,37 @@ void JitArm64::boolX(UGeckoInstruction inst)
gpr.BindToRegister(a, (a == s) || (a == b)); gpr.BindToRegister(a, (a == s) || (a == b));
if (inst.SUBOP10 == 28) // andx if (inst.SUBOP10 == 28) // andx
{ {
AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); AND(gpr.R(a), gpr.R(s), gpr.R(b));
} }
else if (inst.SUBOP10 == 476) // nandx else if (inst.SUBOP10 == 476) // nandx
{ {
AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); AND(gpr.R(a), gpr.R(s), gpr.R(b));
MVN(gpr.R(a), gpr.R(a)); MVN(gpr.R(a), gpr.R(a));
} }
else if (inst.SUBOP10 == 60) // andcx else if (inst.SUBOP10 == 60) // andcx
{ {
BIC(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); BIC(gpr.R(a), gpr.R(s), gpr.R(b));
} }
else if (inst.SUBOP10 == 444) // orx else if (inst.SUBOP10 == 444) // orx
{ {
ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); ORR(gpr.R(a), gpr.R(s), gpr.R(b));
} }
else if (inst.SUBOP10 == 124) // norx else if (inst.SUBOP10 == 124) // norx
{ {
ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); ORR(gpr.R(a), gpr.R(s), gpr.R(b));
MVN(gpr.R(a), gpr.R(a)); MVN(gpr.R(a), gpr.R(a));
} }
else if (inst.SUBOP10 == 412) // orcx else if (inst.SUBOP10 == 412) // orcx
{ {
ORN(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); ORN(gpr.R(a), gpr.R(s), gpr.R(b));
} }
else if (inst.SUBOP10 == 316) // xorx else if (inst.SUBOP10 == 316) // xorx
{ {
EOR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); EOR(gpr.R(a), gpr.R(s), gpr.R(b));
} }
else if (inst.SUBOP10 == 284) // eqvx else if (inst.SUBOP10 == 284) // eqvx
{ {
EON(gpr.R(a), gpr.R(b), gpr.R(s), ArithOption(gpr.R(a), ST_LSL, 0)); EON(gpr.R(a), gpr.R(b), gpr.R(s));
} }
else else
{ {
@ -418,7 +401,7 @@ void JitArm64::negx(UGeckoInstruction inst)
else else
{ {
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), WSP, gpr.R(a), ArithOption(gpr.R(a), ST_LSL, 0)); SUB(gpr.R(d), WSP, gpr.R(a));
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d), 0); ComputeRC(gpr.R(d), 0);
} }
@ -692,8 +675,11 @@ void JitArm64::addic(UGeckoInstruction inst)
else else
{ {
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, imm); MOVI2R(WA, std::abs(simm));
ADDS(gpr.R(d), gpr.R(a), WA); if (simm < 0)
SUBS(gpr.R(d), gpr.R(a), WA);
else
ADDS(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA); gpr.Unlock(WA);
} }

View File

@ -9,19 +9,18 @@
using namespace Arm64Gen; using namespace Arm64Gen;
ARM64Reg src_reg = X0; constexpr ARM64Reg src_reg = X0;
ARM64Reg dst_reg = X1; constexpr ARM64Reg dst_reg = X1;
ARM64Reg count_reg = W2; constexpr ARM64Reg count_reg = W2;
ARM64Reg skipped_reg = W17; constexpr ARM64Reg skipped_reg = W17;
ARM64Reg scratch1_reg = W16; constexpr ARM64Reg scratch1_reg = W16;
ARM64Reg scratch2_reg = W15; constexpr ARM64Reg scratch2_reg = W15;
ARM64Reg scratch3_reg = W14; constexpr ARM64Reg scratch3_reg = W14;
ARM64Reg scratch4_reg = W13; constexpr ARM64Reg saved_count = W12;
ARM64Reg saved_count = W12;
ARM64Reg stride_reg = X11; constexpr ARM64Reg stride_reg = X11;
ARM64Reg arraybase_reg = X10; constexpr ARM64Reg arraybase_reg = X10;
ARM64Reg scale_reg = X9; constexpr ARM64Reg scale_reg = X9;
alignas(16) static const float scale_factors[] = { alignas(16) static const float scale_factors[] = {
1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3),