Core: Move CountPerformanceMarker to VideoInterface to eliminate a Throttle call. PerformanceMetrics: Fixes/Cleanups.

This commit is contained in:
Jordan Woyak 2025-03-07 19:26:47 -06:00
parent 61ab662733
commit c42dab6388
6 changed files with 52 additions and 50 deletions

View file

@ -457,6 +457,8 @@ void CoreTimingManager::LogPendingEvents() const
// Should only be called from the CPU thread after the PPC clock has changed // Should only be called from the CPU thread after the PPC clock has changed
void CoreTimingManager::AdjustEventQueueTimes(u32 new_ppc_clock, u32 old_ppc_clock) void CoreTimingManager::AdjustEventQueueTimes(u32 new_ppc_clock, u32 old_ppc_clock)
{ {
g_perf_metrics.AdjustClockSpeed(m_globals.global_timer, new_ppc_clock, old_ppc_clock);
m_throttle_clock_per_sec = new_ppc_clock; m_throttle_clock_per_sec = new_ppc_clock;
for (Event& ev : m_event_queue) for (Event& ev : m_event_queue)

View file

@ -50,8 +50,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
#include "AudioCommon/Mixer.h" #include "AudioCommon/Mixer.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/Thread.h"
#include "Common/Timer.h" #include "Common/Timer.h"
#include "Core/Config/MainSettings.h" #include "Core/Config/MainSettings.h"
#include "Core/Core.h" #include "Core/Core.h"
@ -122,21 +120,6 @@ void SystemTimersManager::GPUSleepCallback(Core::System& system, u64 userdata, s
system_timers.m_event_type_gpu_sleeper); system_timers.m_event_type_gpu_sleeper);
} }
void SystemTimersManager::PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late)
{
auto& core_timing = system.GetCoreTiming();
// Throttle for accurate performance metrics.
core_timing.Throttle(core_timing.GetTicks() - cycles_late);
g_perf_metrics.CountPerformanceMarker(system, cycles_late);
// Call this performance tracker again in 1/100th of a second.
// The tracker stores 256 values so this will let us summarize the last 2.56 seconds.
// The performance metrics require this to be called at 100hz for the speed% is correct.
auto& system_timers = system.GetSystemTimers();
core_timing.ScheduleEvent(system_timers.GetTicksPerSecond() / 100 - cycles_late,
system_timers.m_event_type_perf_tracker);
}
void SystemTimersManager::VICallback(Core::System& system, u64 userdata, s64 cycles_late) void SystemTimersManager::VICallback(Core::System& system, u64 userdata, s64 cycles_late)
{ {
auto& core_timing = system.GetCoreTiming(); auto& core_timing = system.GetCoreTiming();
@ -293,10 +276,8 @@ void SystemTimersManager::Init()
m_event_type_ipc_hle = m_event_type_ipc_hle =
core_timing.RegisterEvent("IPC_HLE_UpdateCallback", IPC_HLE_UpdateCallback); core_timing.RegisterEvent("IPC_HLE_UpdateCallback", IPC_HLE_UpdateCallback);
m_event_type_gpu_sleeper = core_timing.RegisterEvent("GPUSleeper", GPUSleepCallback); m_event_type_gpu_sleeper = core_timing.RegisterEvent("GPUSleeper", GPUSleepCallback);
m_event_type_perf_tracker = core_timing.RegisterEvent("PerfTracker", PerfTrackerCallback);
m_event_type_patch_engine = core_timing.RegisterEvent("PatchEngine", PatchEngineCallback); m_event_type_patch_engine = core_timing.RegisterEvent("PatchEngine", PatchEngineCallback);
core_timing.ScheduleEvent(0, m_event_type_perf_tracker);
core_timing.ScheduleEvent(0, m_event_type_gpu_sleeper); core_timing.ScheduleEvent(0, m_event_type_gpu_sleeper);
core_timing.ScheduleEvent(vi.GetTicksPerHalfLine(), m_event_type_vi); core_timing.ScheduleEvent(vi.GetTicksPerHalfLine(), m_event_type_vi);
core_timing.ScheduleEvent(0, m_event_type_dsp); core_timing.ScheduleEvent(0, m_event_type_dsp);

View file

@ -94,7 +94,6 @@ private:
static void AudioDMACallback(Core::System& system, u64 userdata, s64 cycles_late); static void AudioDMACallback(Core::System& system, u64 userdata, s64 cycles_late);
static void IPC_HLE_UpdateCallback(Core::System& system, u64 userdata, s64 cycles_late); static void IPC_HLE_UpdateCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void GPUSleepCallback(Core::System& system, u64 userdata, s64 cycles_late); static void GPUSleepCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void VICallback(Core::System& system, u64 userdata, s64 cycles_late); static void VICallback(Core::System& system, u64 userdata, s64 cycles_late);
static void DecrementerCallback(Core::System& system, u64 userdata, s64 cycles_late); static void DecrementerCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late); static void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late);
@ -116,7 +115,6 @@ private:
CoreTiming::EventType* m_event_type_dsp = nullptr; CoreTiming::EventType* m_event_type_dsp = nullptr;
CoreTiming::EventType* m_event_type_ipc_hle = nullptr; CoreTiming::EventType* m_event_type_ipc_hle = nullptr;
CoreTiming::EventType* m_event_type_gpu_sleeper = nullptr; CoreTiming::EventType* m_event_type_gpu_sleeper = nullptr;
CoreTiming::EventType* m_event_type_perf_tracker = nullptr;
// PatchEngine updates every 1/60th of a second by default // PatchEngine updates every 1/60th of a second by default
CoreTiming::EventType* m_event_type_patch_engine = nullptr; CoreTiming::EventType* m_event_type_patch_engine = nullptr;
}; };

View file

@ -914,6 +914,10 @@ void VideoInterfaceManager::Update(u64 ticks)
{ {
// Throttle before SI poll so user input is taken just before needed. (lower input latency) // Throttle before SI poll so user input is taken just before needed. (lower input latency)
core_timing.Throttle(ticks); core_timing.Throttle(ticks);
// This is a nice place to measure performance so we don't have to Throttle elsewhere.
g_perf_metrics.CountPerformanceMarker(ticks, m_system.GetSystemTimers().GetTicksPerSecond());
Core::UpdateInputGate(!Config::Get(Config::MAIN_INPUT_BACKGROUND_INPUT), Core::UpdateInputGate(!Config::Get(Config::MAIN_INPUT_BACKGROUND_INPUT),
Config::Get(Config::MAIN_LOCK_CURSOR)); Config::Get(Config::MAIN_LOCK_CURSOR));
auto& si = m_system.GetSerialInterface(); auto& si = m_system.GetSerialInterface();

View file

@ -9,10 +9,6 @@
#include <implot.h> #include <implot.h>
#include "Core/Config/GraphicsSettings.h" #include "Core/Config/GraphicsSettings.h"
#include "Core/CoreTiming.h"
#include "Core/HW/SystemTimers.h"
#include "Core/HW/VideoInterface.h"
#include "Core/System.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
PerformanceMetrics g_perf_metrics; PerformanceMetrics g_perf_metrics;
@ -21,11 +17,11 @@ void PerformanceMetrics::Reset()
{ {
m_fps_counter.Reset(); m_fps_counter.Reset();
m_vps_counter.Reset(); m_vps_counter.Reset();
m_speed_counter.Reset();
m_time_sleeping = DT::zero(); m_time_sleeping = DT::zero();
m_real_times.fill(Clock::now()); m_samples = {};
m_core_ticks.fill(0);
m_speed = 0;
m_max_speed = 0; m_max_speed = 0;
} }
@ -44,23 +40,36 @@ void PerformanceMetrics::CountThrottleSleep(DT sleep)
m_time_sleeping += sleep; m_time_sleeping += sleep;
} }
void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles_late) void PerformanceMetrics::AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock)
{ {
m_speed_counter.Count(); for (auto& sample : m_samples)
m_speed_counter.UpdateStats(); {
const s64 diff = (sample.core_ticks - ticks) * new_ppc_clock / old_ppc_clock;
sample.core_ticks = ticks + diff;
}
}
const auto ticks = system.GetCoreTiming().GetTicks() - cycles_late; void PerformanceMetrics::CountPerformanceMarker(s64 core_ticks, u32 ticks_per_second)
const auto real_time = Clock::now() - m_time_sleeping; {
const auto clock_time = Clock::now();
const auto work_time = clock_time - m_time_sleeping;
auto& oldest_ticks = m_core_ticks[m_time_index]; m_samples.emplace_back(
auto& oldest_time = m_real_times[m_time_index]; PerfSample{.clock_time = clock_time, .work_time = work_time, .core_ticks = core_ticks});
m_max_speed = DT_s(ticks - oldest_ticks) / system.GetSystemTimers().GetTicksPerSecond() / const auto sample_window = std::chrono::microseconds{g_ActiveConfig.iPerfSampleUSec};
(real_time - oldest_time); while (clock_time - m_samples.front().clock_time > sample_window)
m_samples.pop_front();
oldest_ticks = ticks; // Avoid division by zero when we just have one sample.
oldest_time = real_time; if (m_samples.size() < 2)
++m_time_index; return;
const PerfSample& oldest = m_samples.front();
const auto elapsed_core_time = DT_s(core_ticks - oldest.core_ticks) / ticks_per_second;
m_speed.store(elapsed_core_time / (clock_time - oldest.clock_time), std::memory_order_relaxed);
m_max_speed.store(elapsed_core_time / (work_time - oldest.work_time), std::memory_order_relaxed);
} }
double PerformanceMetrics::GetFPS() const double PerformanceMetrics::GetFPS() const
@ -75,12 +84,12 @@ double PerformanceMetrics::GetVPS() const
double PerformanceMetrics::GetSpeed() const double PerformanceMetrics::GetSpeed() const
{ {
return m_speed_counter.GetHzAvg() / 100.0; return m_speed.load(std::memory_order_relaxed);
} }
double PerformanceMetrics::GetMaxSpeed() const double PerformanceMetrics::GetMaxSpeed() const
{ {
return m_max_speed; return m_max_speed.load(std::memory_order_relaxed);
} }
void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale) void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)

View file

@ -3,8 +3,8 @@
#pragma once #pragma once
#include <array>
#include <atomic> #include <atomic>
#include <deque>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/PerformanceTracker.h" #include "VideoCommon/PerformanceTracker.h"
@ -25,15 +25,17 @@ public:
PerformanceMetrics(PerformanceMetrics&&) = delete; PerformanceMetrics(PerformanceMetrics&&) = delete;
PerformanceMetrics& operator=(PerformanceMetrics&&) = delete; PerformanceMetrics& operator=(PerformanceMetrics&&) = delete;
// Count Functions
void Reset(); void Reset();
void CountFrame(); void CountFrame();
void CountVBlank(); void CountVBlank();
// Call from CPU thread.
void CountThrottleSleep(DT sleep); void CountThrottleSleep(DT sleep);
void CountPerformanceMarker(Core::System& system, s64 cyclesLate); void AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock);
void CountPerformanceMarker(s64 ticks, u32 ticks_per_second);
// Getter Functions // Getter Functions. May be called from any thread.
double GetFPS() const; double GetFPS() const;
double GetVPS() const; double GetVPS() const;
double GetSpeed() const; double GetSpeed() const;
@ -45,14 +47,20 @@ public:
private: private:
PerformanceTracker m_fps_counter{"render_times.txt"}; PerformanceTracker m_fps_counter{"render_times.txt"};
PerformanceTracker m_vps_counter{"vblank_times.txt"}; PerformanceTracker m_vps_counter{"vblank_times.txt"};
PerformanceTracker m_speed_counter{std::nullopt, std::chrono::seconds{1}};
double m_graph_max_time = 0.0; double m_graph_max_time = 0.0;
std::atomic<double> m_speed{};
std::atomic<double> m_max_speed{}; std::atomic<double> m_max_speed{};
u8 m_time_index = 0;
std::array<TimePoint, 256> m_real_times{}; struct PerfSample
std::array<u64, 256> m_core_ticks{}; {
TimePoint clock_time;
TimePoint work_time;
s64 core_ticks;
};
std::deque<PerfSample> m_samples;
DT m_time_sleeping{}; DT m_time_sleeping{};
}; };