Merge pull request #9376 from merryhime/rlwimix2

Jit_Integer: Optimize rlwimix
This commit is contained in:
JMC47 2021-12-20 14:20:26 -05:00 committed by GitHub
commit 9ed368ead7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1966,18 +1966,28 @@ void Jit64::rlwimix(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
if (gpr.IsImm(a, s)) if (gpr.IsImm(a, s))
{ {
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
gpr.SetImmediate32(a, gpr.SetImmediate32(a,
(gpr.Imm32(a) & ~mask) | (Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); (gpr.Imm32(a) & ~mask) | (Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask));
if (inst.Rc) if (inst.Rc)
ComputeRC(a); ComputeRC(a);
} }
else if (gpr.IsImm(s) && mask == 0xFFFFFFFF)
{
gpr.SetImmediate32(a, Common::RotateLeft(gpr.Imm32(s), inst.SH));
if (inst.Rc)
ComputeRC(a);
}
else else
{ {
const u32 mask = MakeRotationMask(inst.MB, inst.ME); const bool left_shift = mask == 0U - (1U << inst.SH);
const bool right_shift = mask == (1U << inst.SH) - 1;
bool needs_test = false; bool needs_test = false;
if (mask == 0 || (a == s && inst.SH == 0)) if (mask == 0 || (a == s && inst.SH == 0))
{ {
needs_test = true; needs_test = true;
@ -1997,63 +2007,73 @@ void Jit64::rlwimix(UGeckoInstruction inst)
AndWithMask(Ra, ~mask); AndWithMask(Ra, ~mask);
OR(32, Ra, Imm32(Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); OR(32, Ra, Imm32(Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask));
} }
else if (inst.SH) else if (gpr.IsImm(a))
{ {
bool isLeftShift = mask == 0U - (1U << inst.SH); const u32 maskA = gpr.Imm32(a) & ~mask;
bool isRightShift = mask == (1U << inst.SH) - 1;
if (gpr.IsImm(a)) RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
if (inst.SH == 0)
{ {
u32 maskA = gpr.Imm32(a) & ~mask; MOV(32, Ra, Rs);
AndWithMask(Ra, mask);
RCOpArg Rs = gpr.Use(s, RCMode::Read); }
RCX64Reg Ra = gpr.Bind(a, RCMode::Write); else if (left_shift)
RegCache::Realize(Rs, Ra); {
MOV(32, Ra, Rs);
if (isLeftShift) SHL(32, Ra, Imm8(inst.SH));
{ }
MOV(32, Ra, Rs); else if (right_shift)
SHL(32, Ra, Imm8(inst.SH)); {
} MOV(32, Ra, Rs);
else if (isRightShift) SHR(32, Ra, Imm8(32 - inst.SH));
{
MOV(32, Ra, Rs);
SHR(32, Ra, Imm8(32 - inst.SH));
}
else
{
RotateLeft(32, Ra, Rs, inst.SH);
AndWithMask(Ra, mask);
}
OR(32, Ra, Imm32(maskA));
} }
else else
{ {
// TODO: common cases of this might be faster with pinsrb or abuse of AH RotateLeft(32, Ra, Rs, inst.SH);
RCOpArg Rs = gpr.Use(s, RCMode::Read); AndWithMask(Ra, mask);
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); }
RegCache::Realize(Rs, Ra);
if (isLeftShift) if (maskA)
{ OR(32, Ra, Imm32(maskA));
MOV(32, R(RSCRATCH), Rs); else
SHL(32, R(RSCRATCH), Imm8(inst.SH)); needs_test = true;
AndWithMask(Ra, ~mask); }
OR(32, Ra, R(RSCRATCH)); else if (inst.SH)
} {
else if (isRightShift) // TODO: perhaps consider pinsrb or abuse of AH
{ RCOpArg Rs = gpr.Use(s, RCMode::Read);
MOV(32, R(RSCRATCH), Rs); RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); RegCache::Realize(Rs, Ra);
AndWithMask(Ra, ~mask);
OR(32, Ra, R(RSCRATCH)); if (left_shift)
} {
else MOV(32, R(RSCRATCH), Rs);
{ SHL(32, R(RSCRATCH), Imm8(inst.SH));
RotateLeft(32, RSCRATCH, Rs, inst.SH); }
XOR(32, R(RSCRATCH), Ra); else if (right_shift)
{
MOV(32, R(RSCRATCH), Rs);
SHR(32, R(RSCRATCH), Imm8(32 - inst.SH));
}
else
{
RotateLeft(32, RSCRATCH, Rs, inst.SH);
}
if (mask == 0xFF || mask == 0xFFFF)
{
MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH));
needs_test = true;
}
else
{
if (!left_shift && !right_shift)
AndWithMask(RSCRATCH, mask); AndWithMask(RSCRATCH, mask);
XOR(32, Ra, R(RSCRATCH)); AndWithMask(Ra, ~mask);
} OR(32, Ra, R(RSCRATCH));
} }
} }
else else
@ -2061,9 +2081,18 @@ void Jit64::rlwimix(UGeckoInstruction inst)
RCX64Reg Rs = gpr.Bind(s, RCMode::Read); RCX64Reg Rs = gpr.Bind(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Rs, Ra); RegCache::Realize(Rs, Ra);
XOR(32, Ra, Rs);
AndWithMask(Ra, ~mask); if (mask == 0xFF || mask == 0xFFFF)
XOR(32, Ra, Rs); {
MOV(mask == 0xFF ? 8 : 16, Ra, Rs);
needs_test = true;
}
else
{
XOR(32, Ra, Rs);
AndWithMask(Ra, ~mask);
XOR(32, Ra, Rs);
}
} }
if (inst.Rc) if (inst.Rc)
ComputeRC(a, needs_test); ComputeRC(a, needs_test);
@ -2088,11 +2117,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
RCOpArg Rs = gpr.Use(s, RCMode::Read); RCOpArg Rs = gpr.Use(s, RCMode::Read);
RegCache::Realize(Ra, Rs); RegCache::Realize(Ra, Rs);
if (a != s) RotateLeft(32, Ra, Rs, amount);
MOV(32, Ra, Rs);
if (amount)
ROL(32, Ra, Imm8(amount));
// we need flags if we're merging the branch // we need flags if we're merging the branch
if (inst.Rc && CheckMergedBranch(0)) if (inst.Rc && CheckMergedBranch(0))