From 4e0a6880767cce8d26a0c6d3bb5f15e4607a627c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 28 Dec 2020 11:48:23 +0000 Subject: [PATCH 1/6] Jit_Integer: rlwnmx: Use RotateLeft --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index bd4b20a2d3..6063a5a704 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1771,11 +1771,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) RCOpArg Rs = gpr.Use(s, RCMode::Read); RegCache::Realize(Ra, Rs); - if (a != s) - MOV(32, Ra, Rs); - - if (amount) - ROL(32, Ra, Imm8(amount)); + RotateLeft(32, Ra, Rs, amount); // we need flags if we're merging the branch if (inst.Rc && CheckMergedBranch(0)) From 19aa10df756761f9770bd25c0b45955fa70370e5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 28 Dec 2020 12:48:03 +0000 Subject: [PATCH 2/6] Jit_Integer: rlwimix: Fix bug in mask == 0xFFFFFFFF case --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 6063a5a704..eab1246a58 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1668,7 +1668,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) else if (mask == 0xFFFFFFFF) { RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); RegCache::Realize(Rs, Ra); RotateLeft(32, Ra, Rs, inst.SH); needs_test = true; From 0ef1fcdeb9bbbc80245b53efeb2bc38aed1e4bcf Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 28 Dec 2020 12:49:48 +0000 Subject: [PATCH 3/6] Jit_Integer: rlwinix: Handle immediate mask == 0xFFFFFFFF case --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index eab1246a58..791febaf0a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1649,17 +1649,24 @@ void Jit64::rlwimix(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + if (gpr.IsImm(a, s)) { - const u32 mask = MakeRotationMask(inst.MB, inst.ME); gpr.SetImmediate32(a, (gpr.Imm32(a) & ~mask) | (Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); if (inst.Rc) ComputeRC(a); } + else if (gpr.IsImm(s) && mask == 0xFFFFFFFF) + { + gpr.SetImmediate32(a, Common::RotateLeft(gpr.Imm32(s), inst.SH)); + + if (inst.Rc) + ComputeRC(a); + } else { - const u32 mask = MakeRotationMask(inst.MB, inst.ME); bool needs_test = false; if (mask == 0 || (a == s && inst.SH == 0)) { From c20bb81071e9fb5ae168817cf065895d1e3a318c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 28 Dec 2020 12:53:24 +0000 Subject: [PATCH 4/6] Jit_Integer: rlwimix: Flatten logic --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 100 +++++++++--------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 791febaf0a..ec62bf169d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1667,7 +1667,10 @@ void Jit64::rlwimix(UGeckoInstruction inst) } else { + const bool left_shift = mask == 0U - (1U << inst.SH); + const bool right_shift = mask == (1U << inst.SH) - 1; bool needs_test = false; + if (mask == 0 || (a == s && inst.SH == 0)) { needs_test = true; @@ -1687,63 +1690,58 @@ void Jit64::rlwimix(UGeckoInstruction inst) AndWithMask(Ra, ~mask); OR(32, Ra, Imm32(Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); } - else if (inst.SH) + else if (inst.SH && gpr.IsImm(a)) { - bool isLeftShift = mask == 0U - (1U << inst.SH); - bool isRightShift = mask == (1U << inst.SH) - 1; - if (gpr.IsImm(a)) + u32 maskA = gpr.Imm32(a) & ~mask; + + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + + if (left_shift) { - u32 maskA = gpr.Imm32(a) & ~mask; - - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); - - if (isLeftShift) - { - MOV(32, Ra, Rs); - SHL(32, Ra, Imm8(inst.SH)); - } - else if (isRightShift) - { - MOV(32, Ra, Rs); - SHR(32, Ra, Imm8(32 - inst.SH)); - } - else - { - RotateLeft(32, Ra, Rs, inst.SH); - AndWithMask(Ra, mask); - } - OR(32, Ra, Imm32(maskA)); + MOV(32, Ra, Rs); + SHL(32, Ra, Imm8(inst.SH)); + } + else if (right_shift) + { + MOV(32, Ra, Rs); + SHR(32, Ra, Imm8(32 - inst.SH)); } else { - // TODO: common cases of this might be faster with pinsrb or abuse of AH - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Rs, Ra); + RotateLeft(32, Ra, Rs, inst.SH); + AndWithMask(Ra, mask); + } + OR(32, Ra, Imm32(maskA)); + } + else if (inst.SH) + { + // TODO: common cases of this might be faster with pinsrb or abuse of AH + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); - if (isLeftShift) - { - MOV(32, R(RSCRATCH), Rs); - SHL(32, R(RSCRATCH), Imm8(inst.SH)); - AndWithMask(Ra, ~mask); - OR(32, Ra, R(RSCRATCH)); - } - else if (isRightShift) - { - MOV(32, R(RSCRATCH), Rs); - SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); - AndWithMask(Ra, ~mask); - OR(32, Ra, R(RSCRATCH)); - } - else - { - RotateLeft(32, RSCRATCH, Rs, inst.SH); - XOR(32, R(RSCRATCH), Ra); - AndWithMask(RSCRATCH, mask); - XOR(32, Ra, R(RSCRATCH)); - } + if (left_shift) + { + MOV(32, R(RSCRATCH), Rs); + SHL(32, R(RSCRATCH), Imm8(inst.SH)); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); + } + else if (right_shift) + { + MOV(32, R(RSCRATCH), Rs); + SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); + } + else + { + RotateLeft(32, RSCRATCH, Rs, inst.SH); + XOR(32, R(RSCRATCH), Ra); + AndWithMask(RSCRATCH, mask); + XOR(32, Ra, R(RSCRATCH)); } } else From 4a102186c7bf8e2e4e6a594e14f0e0dbe32cb9b2 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 28 Dec 2020 12:55:39 +0000 Subject: [PATCH 5/6] Jit_Integer: rlwimix: Handle all gpr.IsImm(a) cases --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index ec62bf169d..72faecef27 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1690,15 +1690,20 @@ void Jit64::rlwimix(UGeckoInstruction inst) AndWithMask(Ra, ~mask); OR(32, Ra, Imm32(Common::RotateLeft(gpr.Imm32(s), inst.SH) & mask)); } - else if (inst.SH && gpr.IsImm(a)) + else if (gpr.IsImm(a)) { - u32 maskA = gpr.Imm32(a) & ~mask; + const u32 maskA = gpr.Imm32(a) & ~mask; RCOpArg Rs = gpr.Use(s, RCMode::Read); RCX64Reg Ra = gpr.Bind(a, RCMode::Write); RegCache::Realize(Rs, Ra); - if (left_shift) + if (inst.SH == 0) + { + MOV(32, Ra, Rs); + AndWithMask(Ra, mask); + } + else if (left_shift) { MOV(32, Ra, Rs); SHL(32, Ra, Imm8(inst.SH)); @@ -1713,7 +1718,11 @@ void Jit64::rlwimix(UGeckoInstruction inst) RotateLeft(32, Ra, Rs, inst.SH); AndWithMask(Ra, mask); } - OR(32, Ra, Imm32(maskA)); + + if (maskA) + OR(32, Ra, Imm32(maskA)); + else + needs_test = true; } else if (inst.SH) { From cdcca72b61d39811d65eeb9a5f5f3af90a547483 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 28 Dec 2020 12:57:53 +0000 Subject: [PATCH 6/6] Jit_Integer: Optimize common rlwimix cases --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 72faecef27..5bc75745f7 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1726,7 +1726,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) } else if (inst.SH) { - // TODO: common cases of this might be faster with pinsrb or abuse of AH + // TODO: perhaps consider pinsrb or abuse of AH RCOpArg Rs = gpr.Use(s, RCMode::Read); RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); RegCache::Realize(Rs, Ra); @@ -1735,22 +1735,28 @@ void Jit64::rlwimix(UGeckoInstruction inst) { MOV(32, R(RSCRATCH), Rs); SHL(32, R(RSCRATCH), Imm8(inst.SH)); - AndWithMask(Ra, ~mask); - OR(32, Ra, R(RSCRATCH)); } else if (right_shift) { MOV(32, R(RSCRATCH), Rs); SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); - AndWithMask(Ra, ~mask); - OR(32, Ra, R(RSCRATCH)); } else { RotateLeft(32, RSCRATCH, Rs, inst.SH); - XOR(32, R(RSCRATCH), Ra); - AndWithMask(RSCRATCH, mask); - XOR(32, Ra, R(RSCRATCH)); + } + + if (mask == 0xFF || mask == 0xFFFF) + { + MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH)); + needs_test = true; + } + else + { + if (!left_shift && !right_shift) + AndWithMask(RSCRATCH, mask); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); } } else @@ -1758,9 +1764,18 @@ void Jit64::rlwimix(UGeckoInstruction inst) RCX64Reg Rs = gpr.Bind(s, RCMode::Read); RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); RegCache::Realize(Rs, Ra); - XOR(32, Ra, Rs); - AndWithMask(Ra, ~mask); - XOR(32, Ra, Rs); + + if (mask == 0xFF || mask == 0xFFFF) + { + MOV(mask == 0xFF ? 8 : 16, Ra, Rs); + needs_test = true; + } + else + { + XOR(32, Ra, Rs); + AndWithMask(Ra, ~mask); + XOR(32, Ra, Rs); + } } if (inst.Rc) ComputeRC(a, needs_test);