From be6aec9932f6f4467052fa8a72dbeeb494201d9d Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 27 Dec 2020 20:47:53 +0000 Subject: [PATCH 1/4] Arm64Emitter: Add BFXIL --- Source/Core/Common/Arm64Emitter.cpp | 8 ++++++++ Source/Core/Common/Arm64Emitter.h | 1 + 2 files changed, 9 insertions(+) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index aeee7d2891..f0451e8a48 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1663,6 +1663,14 @@ void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) lsb, width); EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1); } +void ARM64XEmitter::BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) +{ + u32 size = Is64Bit(Rn) ? 64 : 32; + ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb, + "%s passed lsb %d and width %d which is greater than the register size!", __func__, + lsb, width); + EncodeBitfieldMOVInst(1, Rd, Rn, lsb, lsb + width - 1); +} void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) { u32 size = Is64Bit(Rn) ? 64 : 32; diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 246a7d86a6..db4b3ff02c 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -770,6 +770,7 @@ public: void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width); + void BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width); void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width); // Extract register (ROR with two inputs, if same then faster on A67) From 75d92ad6280f6e37bb59cc4feb97e0bf34edb756 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 31 Jan 2021 11:48:39 +0000 Subject: [PATCH 2/4] Arm64Emitter: Prefer BFM/UBFM to EncodeBitfieldMOVInst --- Source/Core/Common/Arm64Emitter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index f0451e8a48..cad26bbb4f 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1661,7 +1661,7 @@ void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) ASSERT_MSG(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!", __func__, lsb, width); - EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1); + BFM(Rd, Rn, (size - lsb) % size, width - 1); } void ARM64XEmitter::BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) { @@ -1669,7 +1669,7 @@ void ARM64XEmitter::BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb, "%s passed lsb %d and width %d which is greater than the register size!", __func__, lsb, width); - EncodeBitfieldMOVInst(1, Rd, Rn, lsb, lsb + width - 1); + BFM(Rd, Rn, lsb, lsb + width - 1); } void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) { @@ -1677,7 +1677,7 @@ void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) ASSERT_MSG(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!", __func__, lsb, width); - EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1); + UBFM(Rd, Rn, (size - lsb) % size, width - 1); } void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) { From 8aa2013a2d5168338cae888a4c0a766cb8457240 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 31 Jan 2021 11:50:34 +0000 Subject: [PATCH 3/4] Arm64Emitter: Add additional assertions to BFI/UBFIZ --- Source/Core/Common/Arm64Emitter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index cad26bbb4f..c3cb492a5f 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1658,7 +1658,7 @@ void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) { u32 size = Is64Bit(Rn) ? 64 : 32; - ASSERT_MSG(DYNA_REC, (lsb + width) <= size, + ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb, "%s passed lsb %d and width %d which is greater than the register size!", __func__, lsb, width); BFM(Rd, Rn, (size - lsb) % size, width - 1); @@ -1674,7 +1674,7 @@ void ARM64XEmitter::BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) { u32 size = Is64Bit(Rn) ? 64 : 32; - ASSERT_MSG(DYNA_REC, (lsb + width) <= size, + ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb, "%s passed lsb %d and width %d which is greater than the register size!", __func__, lsb, width); UBFM(Rd, Rn, (size - lsb) % size, width - 1); From a0b8956f22db39c59c3f8c1439086304423f7997 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 27 Dec 2020 20:23:03 +0000 Subject: [PATCH 4/4] JitArm64_Integer: Add optimizations for rlwimix * Check for case when source field is at LSB * Use BFXIL if possible * Avoid ROR where possible --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 1ce0377ac1..bd806275bf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1443,6 +1443,10 @@ void JitArm64::rlwimix(UGeckoInstruction inst) const int a = inst.RA, s = inst.RS; const u32 mask = MakeRotationMask(inst.MB, inst.ME); + const u32 lsb = 31 - inst.ME; + const u32 width = inst.ME - inst.MB + 1; + const u32 rot_dist = inst.SH ? 32 - inst.SH : 0; + if (gpr.IsImm(a) && gpr.IsImm(s)) { u32 res = (gpr.GetImm(a) & ~mask) | (Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask); @@ -1462,17 +1466,22 @@ void JitArm64::rlwimix(UGeckoInstruction inst) gpr.BindToRegister(a, a == s); if (inst.SH) - ROR(gpr.R(a), gpr.R(s), 32 - inst.SH); + ROR(gpr.R(a), gpr.R(s), rot_dist); else if (a != s) MOV(gpr.R(a), gpr.R(s)); } + else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32) + { + // Destination is in least significant position + // No mask inversion + // Source field pre-rotation is contiguous + gpr.BindToRegister(a, true); + BFXIL(gpr.R(a), gpr.R(s), rot_dist, width); + } else if (inst.SH == 0 && inst.MB <= inst.ME) { // No rotation // No mask inversion - u32 lsb = 31 - inst.ME; - u32 width = inst.ME - inst.MB + 1; - gpr.BindToRegister(a, true); ARM64Reg WA = gpr.GetReg(); UBFX(WA, gpr.R(s), lsb, width); @@ -1482,15 +1491,18 @@ void JitArm64::rlwimix(UGeckoInstruction inst) else if (inst.SH && inst.MB <= inst.ME) { // No mask inversion - u32 lsb = 31 - inst.ME; - u32 width = inst.ME - inst.MB + 1; - gpr.BindToRegister(a, true); - ARM64Reg WA = gpr.GetReg(); - ROR(WA, gpr.R(s), 32 - inst.SH); - UBFX(WA, WA, lsb, width); - BFI(gpr.R(a), WA, lsb, width); - gpr.Unlock(WA); + if ((rot_dist + lsb) % 32 == 0) + { + BFI(gpr.R(a), gpr.R(s), lsb, width); + } + else + { + ARM64Reg WA = gpr.GetReg(); + ROR(WA, gpr.R(s), (rot_dist + lsb) % 32); + BFI(gpr.R(a), WA, lsb, width); + gpr.Unlock(WA); + } } else { @@ -1500,7 +1512,7 @@ void JitArm64::rlwimix(UGeckoInstruction inst) MOVI2R(WA, mask); BIC(WB, gpr.R(a), WA); - AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, 32 - inst.SH)); + AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, rot_dist)); ORR(gpr.R(a), WB, WA); gpr.Unlock(WA, WB);