mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-04-24 14:24:54 +00:00
Initial Change For Fixes
Matches hardware in rounding paired singles after move operations!! Move operations consist of operations which only transfer direct bits This also accounts for ps_rsqrte, because it has a similar quirk Specifically in hardware they're rounded in accordance to their slot: - PS0 rounds *only the mantissa* in accordance to the set rounding mode - PS1 truncates the mantissa ps_rsqrte also truncates for PS0 for some reason ^^; This has all been tested on hardware, along with a few edge case tests Co-Authored-By: JosJuice <josjuice@gmail.com>
This commit is contained in:
parent
5fe9e2f6ed
commit
ae6a88dc50
1 changed files with 84 additions and 17 deletions
|
@ -10,6 +10,66 @@
|
|||
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
// Instructions which move data without performing operations round a bit weirdly
|
||||
// Specifically they rounding the mantissa to be like that of a 32-bit float,
|
||||
// going as far as to focus on the rounding mode, but never actually care about
|
||||
// making sure the exponent becomes 32-bit
|
||||
// Either this, or they'll truncate the mantissa down, which will always happen to
|
||||
// PS1 OR PS0 in ps_rsqrte
|
||||
inline u64 TruncateMantissaBits(u64 bits)
|
||||
{
|
||||
// Truncation can be done by simply cutting off the mantissa bits that don't
|
||||
// exist in a single precision float
|
||||
constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH;
|
||||
constexpr u64 remove_mask = (1 << remove_bits) - 1;
|
||||
return bits & ~remove_mask;
|
||||
}
|
||||
|
||||
inline double TruncateMantissa(double value)
|
||||
{
|
||||
u64 bits = std::bit_cast<u64>(value);
|
||||
u64 trunc_bits = TruncateMantissaBits(bits);
|
||||
return std::bit_cast<double>(trunc_bits);
|
||||
}
|
||||
|
||||
inline u64 RoundMantissaBits(u64 bits)
|
||||
{
|
||||
// Checking if the value is non-finite
|
||||
if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP)
|
||||
{
|
||||
// For infinite and NaN values, the mantissa is simply truncated
|
||||
return TruncateMantissaBits(bits);
|
||||
}
|
||||
|
||||
const u64 replacement_exp = 0x4000000000000000ull;
|
||||
|
||||
// To round only the mantissa, we assume the CPU can change the rounding mode,
|
||||
// create new double with an exponent that won't cause issues, round to a single,
|
||||
// and convert back to a double while restoring the original exponent again!
|
||||
// The removing the exponent is done via subtraction instead of bitwise
|
||||
// operations due to the possibility that the rounding will cause an overflow
|
||||
// into the exponent
|
||||
u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp;
|
||||
|
||||
float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
|
||||
double extended_float = static_cast<double>(rounded_float);
|
||||
u64 rounded_bits = std::bit_cast<u64>(extended_float);
|
||||
|
||||
u64 orig_exp_bits = bits & Common::DOUBLE_EXP;
|
||||
rounded_bits = (rounded_bits - replacement_exp) | orig_exp_bits;
|
||||
|
||||
return rounded_bits;
|
||||
}
|
||||
|
||||
inline double RoundMantissa(double value)
|
||||
{
|
||||
// The double version of the function just converts to and from bits again
|
||||
// This would be a necessary step anyways, so it just simplifies code
|
||||
u64 bits = std::bit_cast<u64>(value);
|
||||
u64 rounded_bits = RoundMantissaBits(bits);
|
||||
return std::bit_cast<double>(rounded_bits);
|
||||
}
|
||||
|
||||
// These "binary instructions" do not alter FPSCR.
|
||||
void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
{
|
||||
|
@ -18,8 +78,9 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
const auto& b = ppc_state.ps[inst.FB];
|
||||
const auto& c = ppc_state.ps[inst.FC];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
|
||||
a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());
|
||||
double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble();
|
||||
double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble();
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(ps0), TruncateMantissa(ps1));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -30,8 +91,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63),
|
||||
b.PS1AsU64() ^ (UINT64_C(1) << 63));
|
||||
u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63);
|
||||
u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63);
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -40,7 +102,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst)
|
||||
{
|
||||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
ppc_state.ps[inst.FD] = ppc_state.ps[inst.FB];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(b.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -51,8 +115,9 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63),
|
||||
b.PS1AsU64() | (UINT64_C(1) << 63));
|
||||
u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63);
|
||||
u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63);
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -63,8 +128,9 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
auto& ppc_state = interpreter.m_ppc_state;
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63),
|
||||
b.PS1AsU64() & ~(UINT64_C(1) << 63));
|
||||
u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63);
|
||||
u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63);
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -77,7 +143,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS0AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS0AsDouble()));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -89,7 +155,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS1AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -101,7 +167,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS0AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS0AsDouble()));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -113,7 +179,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
const auto& a = ppc_state.ps[inst.FA];
|
||||
const auto& b = ppc_state.ps[inst.FB];
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS1AsDouble());
|
||||
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS1AsDouble()));
|
||||
|
||||
if (inst.Rc)
|
||||
ppc_state.UpdateCR1();
|
||||
|
@ -191,8 +257,9 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
|
||||
SetFPException(ppc_state, FPSCR_VXSNAN);
|
||||
|
||||
const float dst_ps0 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps0));
|
||||
const float dst_ps1 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps1));
|
||||
// For some reason ps0 is also truncated for this operation rather than rounded
|
||||
const double dst_ps0 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0));
|
||||
const double dst_ps1 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps1));
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1);
|
||||
ppc_state.UpdateFPRFSingle(dst_ps0);
|
||||
|
@ -359,7 +426,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
|
||||
const float ps0 =
|
||||
ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
|
||||
const float ps1 = ForceSingle(ppc_state.fpscr, c.PS1AsDouble());
|
||||
const double ps1 = TruncateMantissa(c.PS1AsDouble());
|
||||
|
||||
ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
|
||||
ppc_state.UpdateFPRFSingle(ps0);
|
||||
|
@ -375,7 +442,7 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
const auto& b = ppc_state.ps[inst.FB];
|
||||
const auto& c = ppc_state.ps[inst.FC];
|
||||
|
||||
const float ps0 = ForceSingle(ppc_state.fpscr, c.PS0AsDouble());
|
||||
const double ps0 = RoundMantissa(c.PS0AsDouble());
|
||||
const float ps1 =
|
||||
ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue