diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 067587ede6..82d0db9f7c 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -463,6 +463,8 @@ void CoreTimingManager::LogPendingEvents() const // Should only be called from the CPU thread after the PPC clock has changed void CoreTimingManager::AdjustEventQueueTimes(u32 new_ppc_clock, u32 old_ppc_clock) { + g_perf_metrics.AdjustClockSpeed(m_globals.global_timer, new_ppc_clock, old_ppc_clock); + m_throttle_clock_per_sec = new_ppc_clock; for (Event& ev : m_event_queue) diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index 508b445288..c318ca08d4 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -50,8 +50,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule: #include "AudioCommon/Mixer.h" #include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/Thread.h" #include "Common/Timer.h" #include "Core/Config/MainSettings.h" #include "Core/Core.h" @@ -122,21 +120,6 @@ void SystemTimersManager::GPUSleepCallback(Core::System& system, u64 userdata, s system_timers.m_event_type_gpu_sleeper); } -void SystemTimersManager::PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late) -{ - auto& core_timing = system.GetCoreTiming(); - // Throttle for accurate performance metrics. - core_timing.Throttle(core_timing.GetTicks() - cycles_late); - g_perf_metrics.CountPerformanceMarker(system, cycles_late); - - // Call this performance tracker again in 1/100th of a second. - // The tracker stores 256 values so this will let us summarize the last 2.56 seconds. - // The performance metrics require this to be called at 100hz for the speed% is correct. - auto& system_timers = system.GetSystemTimers(); - core_timing.ScheduleEvent(system_timers.GetTicksPerSecond() / 100 - cycles_late, - system_timers.m_event_type_perf_tracker); -} - void SystemTimersManager::VICallback(Core::System& system, u64 userdata, s64 cycles_late) { auto& core_timing = system.GetCoreTiming(); @@ -293,10 +276,8 @@ void SystemTimersManager::Init() m_event_type_ipc_hle = core_timing.RegisterEvent("IPC_HLE_UpdateCallback", IPC_HLE_UpdateCallback); m_event_type_gpu_sleeper = core_timing.RegisterEvent("GPUSleeper", GPUSleepCallback); - m_event_type_perf_tracker = core_timing.RegisterEvent("PerfTracker", PerfTrackerCallback); m_event_type_patch_engine = core_timing.RegisterEvent("PatchEngine", PatchEngineCallback); - core_timing.ScheduleEvent(0, m_event_type_perf_tracker); core_timing.ScheduleEvent(0, m_event_type_gpu_sleeper); core_timing.ScheduleEvent(vi.GetTicksPerHalfLine(), m_event_type_vi); core_timing.ScheduleEvent(0, m_event_type_dsp); diff --git a/Source/Core/Core/HW/SystemTimers.h b/Source/Core/Core/HW/SystemTimers.h index 8d5f051692..b0002c49ee 100644 --- a/Source/Core/Core/HW/SystemTimers.h +++ b/Source/Core/Core/HW/SystemTimers.h @@ -94,7 +94,6 @@ private: static void AudioDMACallback(Core::System& system, u64 userdata, s64 cycles_late); static void IPC_HLE_UpdateCallback(Core::System& system, u64 userdata, s64 cycles_late); static void GPUSleepCallback(Core::System& system, u64 userdata, s64 cycles_late); - static void PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late); static void VICallback(Core::System& system, u64 userdata, s64 cycles_late); static void DecrementerCallback(Core::System& system, u64 userdata, s64 cycles_late); static void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late); @@ -116,7 +115,6 @@ private: CoreTiming::EventType* m_event_type_dsp = nullptr; CoreTiming::EventType* m_event_type_ipc_hle = nullptr; CoreTiming::EventType* m_event_type_gpu_sleeper = nullptr; - CoreTiming::EventType* m_event_type_perf_tracker = nullptr; // PatchEngine updates every 1/60th of a second by default CoreTiming::EventType* m_event_type_patch_engine = nullptr; }; diff --git a/Source/Core/Core/HW/VideoInterface.cpp b/Source/Core/Core/HW/VideoInterface.cpp index 21346534ec..b03a8c5988 100644 --- a/Source/Core/Core/HW/VideoInterface.cpp +++ b/Source/Core/Core/HW/VideoInterface.cpp @@ -853,10 +853,14 @@ void VideoInterfaceManager::EndField(FieldType field, u64 ticks) if (!Config::Get(Config::GFX_HACK_EARLY_XFB_OUTPUT)) OutputField(field, ticks); - // Note: We really only need to Throttle prior to to presentation, - // but it is needed here if we want accurate "VBlank" statistics, - // when using GPU-on-Thread or Early/Immediate XFB. - m_system.GetCoreTiming().Throttle(ticks); + // Note: OutputField above doesn't present when using GPU-on-Thread or Early/Immediate XFB, + // giving "VBlank" measurements here poor pacing without a Throttle call. + // If the user actually wants the data, we'll Throttle to make the numbers nice. + const bool is_vblank_data_wanted = g_ActiveConfig.bShowVPS || g_ActiveConfig.bShowVTimes || + g_ActiveConfig.bLogRenderTimeToFile || + g_ActiveConfig.bShowGraphs; + if (is_vblank_data_wanted) + m_system.GetCoreTiming().Throttle(ticks); g_perf_metrics.CountVBlank(); VIEndFieldEvent::Trigger(); @@ -914,6 +918,10 @@ void VideoInterfaceManager::Update(u64 ticks) { // Throttle before SI poll so user input is taken just before needed. (lower input latency) core_timing.Throttle(ticks); + + // This is a nice place to measure performance so we don't have to Throttle elsewhere. + g_perf_metrics.CountPerformanceMarker(ticks, m_system.GetSystemTimers().GetTicksPerSecond()); + Core::UpdateInputGate(!Config::Get(Config::MAIN_INPUT_BACKGROUND_INPUT), Config::Get(Config::MAIN_LOCK_CURSOR)); auto& si = m_system.GetSerialInterface(); diff --git a/Source/Core/VideoCommon/PerformanceMetrics.cpp b/Source/Core/VideoCommon/PerformanceMetrics.cpp index db61ea8978..321e110996 100644 --- a/Source/Core/VideoCommon/PerformanceMetrics.cpp +++ b/Source/Core/VideoCommon/PerformanceMetrics.cpp @@ -9,10 +9,6 @@ #include #include "Core/Config/GraphicsSettings.h" -#include "Core/CoreTiming.h" -#include "Core/HW/SystemTimers.h" -#include "Core/HW/VideoInterface.h" -#include "Core/System.h" #include "VideoCommon/VideoConfig.h" PerformanceMetrics g_perf_metrics; @@ -21,11 +17,11 @@ void PerformanceMetrics::Reset() { m_fps_counter.Reset(); m_vps_counter.Reset(); - m_speed_counter.Reset(); m_time_sleeping = DT::zero(); - m_real_times.fill(Clock::now()); - m_core_ticks.fill(0); + m_samples = {}; + + m_speed = 0; m_max_speed = 0; } @@ -44,23 +40,36 @@ void PerformanceMetrics::CountThrottleSleep(DT sleep) m_time_sleeping += sleep; } -void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles_late) +void PerformanceMetrics::AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock) { - m_speed_counter.Count(); - m_speed_counter.UpdateStats(); + for (auto& sample : m_samples) + { + const s64 diff = (sample.core_ticks - ticks) * new_ppc_clock / old_ppc_clock; + sample.core_ticks = ticks + diff; + } +} - const auto ticks = system.GetCoreTiming().GetTicks() - cycles_late; - const auto real_time = Clock::now() - m_time_sleeping; +void PerformanceMetrics::CountPerformanceMarker(s64 core_ticks, u32 ticks_per_second) +{ + const auto clock_time = Clock::now(); + const auto work_time = clock_time - m_time_sleeping; - auto& oldest_ticks = m_core_ticks[m_time_index]; - auto& oldest_time = m_real_times[m_time_index]; + m_samples.emplace_back( + PerfSample{.clock_time = clock_time, .work_time = work_time, .core_ticks = core_ticks}); - m_max_speed = DT_s(ticks - oldest_ticks) / system.GetSystemTimers().GetTicksPerSecond() / - (real_time - oldest_time); + const auto sample_window = std::chrono::microseconds{g_ActiveConfig.iPerfSampleUSec}; + while (clock_time - m_samples.front().clock_time > sample_window) + m_samples.pop_front(); - oldest_ticks = ticks; - oldest_time = real_time; - ++m_time_index; + // Avoid division by zero when we just have one sample. + if (m_samples.size() < 2) + return; + + const PerfSample& oldest = m_samples.front(); + const auto elapsed_core_time = DT_s(core_ticks - oldest.core_ticks) / ticks_per_second; + + m_speed.store(elapsed_core_time / (clock_time - oldest.clock_time), std::memory_order_relaxed); + m_max_speed.store(elapsed_core_time / (work_time - oldest.work_time), std::memory_order_relaxed); } double PerformanceMetrics::GetFPS() const @@ -75,12 +84,12 @@ double PerformanceMetrics::GetVPS() const double PerformanceMetrics::GetSpeed() const { - return m_speed_counter.GetHzAvg() / 100.0; + return m_speed.load(std::memory_order_relaxed); } double PerformanceMetrics::GetMaxSpeed() const { - return m_max_speed; + return m_max_speed.load(std::memory_order_relaxed); } void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale) diff --git a/Source/Core/VideoCommon/PerformanceMetrics.h b/Source/Core/VideoCommon/PerformanceMetrics.h index ca035b108e..7beac394dc 100644 --- a/Source/Core/VideoCommon/PerformanceMetrics.h +++ b/Source/Core/VideoCommon/PerformanceMetrics.h @@ -3,8 +3,8 @@ #pragma once -#include #include +#include #include "Common/CommonTypes.h" #include "VideoCommon/PerformanceTracker.h" @@ -25,15 +25,17 @@ public: PerformanceMetrics(PerformanceMetrics&&) = delete; PerformanceMetrics& operator=(PerformanceMetrics&&) = delete; - // Count Functions void Reset(); + void CountFrame(); void CountVBlank(); + // Call from CPU thread. void CountThrottleSleep(DT sleep); - void CountPerformanceMarker(Core::System& system, s64 cyclesLate); + void AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock); + void CountPerformanceMarker(s64 ticks, u32 ticks_per_second); - // Getter Functions + // Getter Functions. May be called from any thread. double GetFPS() const; double GetVPS() const; double GetSpeed() const; @@ -45,14 +47,20 @@ public: private: PerformanceTracker m_fps_counter{"render_times.txt"}; PerformanceTracker m_vps_counter{"vblank_times.txt"}; - PerformanceTracker m_speed_counter{std::nullopt, std::chrono::seconds{1}}; double m_graph_max_time = 0.0; + std::atomic m_speed{}; std::atomic m_max_speed{}; - u8 m_time_index = 0; - std::array m_real_times{}; - std::array m_core_ticks{}; + + struct PerfSample + { + TimePoint clock_time; + TimePoint work_time; + s64 core_ticks; + }; + + std::deque m_samples; DT m_time_sleeping{}; };