mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-04-25 06:44:59 +00:00
Merge pull request #9373 from MerryMage/arm64-rlwimix
JitArm64_Integer: Add optimizations for rlwimix
This commit is contained in:
commit
27b7e5891d
3 changed files with 38 additions and 17 deletions
|
@ -1658,18 +1658,26 @@ void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
|
||||||
void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
||||||
{
|
{
|
||||||
u32 size = Is64Bit(Rn) ? 64 : 32;
|
u32 size = Is64Bit(Rn) ? 64 : 32;
|
||||||
ASSERT_MSG(DYNA_REC, (lsb + width) <= size,
|
ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb,
|
||||||
"%s passed lsb %d and width %d which is greater than the register size!", __func__,
|
"%s passed lsb %d and width %d which is greater than the register size!", __func__,
|
||||||
lsb, width);
|
lsb, width);
|
||||||
EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1);
|
BFM(Rd, Rn, (size - lsb) % size, width - 1);
|
||||||
|
}
|
||||||
|
void ARM64XEmitter::BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
||||||
|
{
|
||||||
|
u32 size = Is64Bit(Rn) ? 64 : 32;
|
||||||
|
ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb,
|
||||||
|
"%s passed lsb %d and width %d which is greater than the register size!", __func__,
|
||||||
|
lsb, width);
|
||||||
|
BFM(Rd, Rn, lsb, lsb + width - 1);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
||||||
{
|
{
|
||||||
u32 size = Is64Bit(Rn) ? 64 : 32;
|
u32 size = Is64Bit(Rn) ? 64 : 32;
|
||||||
ASSERT_MSG(DYNA_REC, (lsb + width) <= size,
|
ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb,
|
||||||
"%s passed lsb %d and width %d which is greater than the register size!", __func__,
|
"%s passed lsb %d and width %d which is greater than the register size!", __func__,
|
||||||
lsb, width);
|
lsb, width);
|
||||||
EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1);
|
UBFM(Rd, Rn, (size - lsb) % size, width - 1);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift)
|
void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift)
|
||||||
{
|
{
|
||||||
|
|
|
@ -770,6 +770,7 @@ public:
|
||||||
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||||
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||||
void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
||||||
|
void BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
||||||
void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
||||||
|
|
||||||
// Extract register (ROR with two inputs, if same then faster on A67)
|
// Extract register (ROR with two inputs, if same then faster on A67)
|
||||||
|
|
|
@ -1443,6 +1443,10 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
|
||||||
const int a = inst.RA, s = inst.RS;
|
const int a = inst.RA, s = inst.RS;
|
||||||
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
|
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
|
||||||
|
|
||||||
|
const u32 lsb = 31 - inst.ME;
|
||||||
|
const u32 width = inst.ME - inst.MB + 1;
|
||||||
|
const u32 rot_dist = inst.SH ? 32 - inst.SH : 0;
|
||||||
|
|
||||||
if (gpr.IsImm(a) && gpr.IsImm(s))
|
if (gpr.IsImm(a) && gpr.IsImm(s))
|
||||||
{
|
{
|
||||||
u32 res = (gpr.GetImm(a) & ~mask) | (Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask);
|
u32 res = (gpr.GetImm(a) & ~mask) | (Common::RotateLeft(gpr.GetImm(s), inst.SH) & mask);
|
||||||
|
@ -1462,17 +1466,22 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
|
||||||
gpr.BindToRegister(a, a == s);
|
gpr.BindToRegister(a, a == s);
|
||||||
|
|
||||||
if (inst.SH)
|
if (inst.SH)
|
||||||
ROR(gpr.R(a), gpr.R(s), 32 - inst.SH);
|
ROR(gpr.R(a), gpr.R(s), rot_dist);
|
||||||
else if (a != s)
|
else if (a != s)
|
||||||
MOV(gpr.R(a), gpr.R(s));
|
MOV(gpr.R(a), gpr.R(s));
|
||||||
}
|
}
|
||||||
|
else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32)
|
||||||
|
{
|
||||||
|
// Destination is in least significant position
|
||||||
|
// No mask inversion
|
||||||
|
// Source field pre-rotation is contiguous
|
||||||
|
gpr.BindToRegister(a, true);
|
||||||
|
BFXIL(gpr.R(a), gpr.R(s), rot_dist, width);
|
||||||
|
}
|
||||||
else if (inst.SH == 0 && inst.MB <= inst.ME)
|
else if (inst.SH == 0 && inst.MB <= inst.ME)
|
||||||
{
|
{
|
||||||
// No rotation
|
// No rotation
|
||||||
// No mask inversion
|
// No mask inversion
|
||||||
u32 lsb = 31 - inst.ME;
|
|
||||||
u32 width = inst.ME - inst.MB + 1;
|
|
||||||
|
|
||||||
gpr.BindToRegister(a, true);
|
gpr.BindToRegister(a, true);
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
UBFX(WA, gpr.R(s), lsb, width);
|
UBFX(WA, gpr.R(s), lsb, width);
|
||||||
|
@ -1482,15 +1491,18 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
|
||||||
else if (inst.SH && inst.MB <= inst.ME)
|
else if (inst.SH && inst.MB <= inst.ME)
|
||||||
{
|
{
|
||||||
// No mask inversion
|
// No mask inversion
|
||||||
u32 lsb = 31 - inst.ME;
|
|
||||||
u32 width = inst.ME - inst.MB + 1;
|
|
||||||
|
|
||||||
gpr.BindToRegister(a, true);
|
gpr.BindToRegister(a, true);
|
||||||
ARM64Reg WA = gpr.GetReg();
|
if ((rot_dist + lsb) % 32 == 0)
|
||||||
ROR(WA, gpr.R(s), 32 - inst.SH);
|
{
|
||||||
UBFX(WA, WA, lsb, width);
|
BFI(gpr.R(a), gpr.R(s), lsb, width);
|
||||||
BFI(gpr.R(a), WA, lsb, width);
|
}
|
||||||
gpr.Unlock(WA);
|
else
|
||||||
|
{
|
||||||
|
ARM64Reg WA = gpr.GetReg();
|
||||||
|
ROR(WA, gpr.R(s), (rot_dist + lsb) % 32);
|
||||||
|
BFI(gpr.R(a), WA, lsb, width);
|
||||||
|
gpr.Unlock(WA);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1500,7 +1512,7 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
|
||||||
|
|
||||||
MOVI2R(WA, mask);
|
MOVI2R(WA, mask);
|
||||||
BIC(WB, gpr.R(a), WA);
|
BIC(WB, gpr.R(a), WA);
|
||||||
AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, 32 - inst.SH));
|
AND(WA, WA, gpr.R(s), ArithOption(gpr.R(s), ShiftType::ROR, rot_dist));
|
||||||
ORR(gpr.R(a), WB, WA);
|
ORR(gpr.R(a), WB, WA);
|
||||||
|
|
||||||
gpr.Unlock(WA, WB);
|
gpr.Unlock(WA, WB);
|
||||||
|
|
Loading…
Add table
Reference in a new issue