From b2ce3fbefc34fac9449e5f88f15c7a4da0825fac Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 5 Mar 2025 01:36:18 -0600 Subject: [PATCH 1/3] PerformanceTracker: Pass chrono values instead of us s64. --- Source/Core/VideoCommon/PerformanceMetrics.h | 2 +- Source/Core/VideoCommon/PerformanceTracker.cpp | 9 ++++----- Source/Core/VideoCommon/PerformanceTracker.h | 5 ++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Source/Core/VideoCommon/PerformanceMetrics.h b/Source/Core/VideoCommon/PerformanceMetrics.h index d1f4211a46..672c07037d 100644 --- a/Source/Core/VideoCommon/PerformanceMetrics.h +++ b/Source/Core/VideoCommon/PerformanceMetrics.h @@ -47,7 +47,7 @@ public: private: PerformanceTracker m_fps_counter{"render_times.txt"}; PerformanceTracker m_vps_counter{"vblank_times.txt"}; - PerformanceTracker m_speed_counter{std::nullopt, 1000000}; + PerformanceTracker m_speed_counter{std::nullopt, std::chrono::seconds{1}}; double m_graph_max_time = 0.0; diff --git a/Source/Core/VideoCommon/PerformanceTracker.cpp b/Source/Core/VideoCommon/PerformanceTracker.cpp index 93b33cff9a..64729a5cad 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.cpp +++ b/Source/Core/VideoCommon/PerformanceTracker.cpp @@ -12,21 +12,20 @@ #include "Common/CommonTypes.h" #include "Common/FileUtil.h" -#include "Common/Timer.h" #include "Core/Core.h" #include "VideoCommon/VideoConfig.h" static constexpr double SAMPLE_RC_RATIO = 0.25; PerformanceTracker::PerformanceTracker(const std::optional log_name, - const std::optional sample_window_us) + const std::optional
sample_window_duration) : m_on_state_changed_handle{Core::AddOnStateChangedCallback([this](Core::State state) { if (state == Core::State::Paused) SetPaused(true); else if (state == Core::State::Running) SetPaused(false); })}, - m_log_name{log_name}, m_sample_window_us{sample_window_us} + m_log_name{log_name}, m_sample_window_duration{sample_window_duration} { Reset(); } @@ -92,8 +91,8 @@ void PerformanceTracker::Count() DT PerformanceTracker::GetSampleWindow() const { // This reads a constant value and thus does not need a mutex - return std::chrono::duration_cast
( - DT_us(m_sample_window_us.value_or(std::max(1, g_ActiveConfig.iPerfSampleUSec)))); + return m_sample_window_duration.value_or( + duration_cast
(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)})); } double PerformanceTracker::GetHzAvg() const diff --git a/Source/Core/VideoCommon/PerformanceTracker.h b/Source/Core/VideoCommon/PerformanceTracker.h index f24601c845..a41271abaa 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.h +++ b/Source/Core/VideoCommon/PerformanceTracker.h @@ -4,7 +4,6 @@ #pragma once #include -#include #include #include #include @@ -35,7 +34,7 @@ private: public: PerformanceTracker(const std::optional log_name = std::nullopt, - const std::optional sample_window_us = std::nullopt); + const std::optional
sample_window_duration = std::nullopt); ~PerformanceTracker(); PerformanceTracker(const PerformanceTracker&) = delete; @@ -84,7 +83,7 @@ private: // Functions for managing dt queue TimePoint m_last_time; // Amount of time to sample dt's over (defaults to config) - const std::optional m_sample_window_us; + const std::optional
m_sample_window_duration; // Queue + Running Total used to calculate average dt DT m_dt_total = DT::zero(); From c763961112f858732c1cad58e286d7a076ef7e5f Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 5 Mar 2025 02:06:05 -0600 Subject: [PATCH 2/3] PerformanceTracker: Use std::deque instead of hand-rolled circular queue. --- .../Core/VideoCommon/PerformanceTracker.cpp | 82 ++++++------------- Source/Core/VideoCommon/PerformanceTracker.h | 41 ++-------- 2 files changed, 33 insertions(+), 90 deletions(-) diff --git a/Source/Core/VideoCommon/PerformanceTracker.cpp b/Source/Core/VideoCommon/PerformanceTracker.cpp index 64729a5cad..302adc6a57 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.cpp +++ b/Source/Core/VideoCommon/PerformanceTracker.cpp @@ -16,6 +16,8 @@ #include "VideoCommon/VideoConfig.h" static constexpr double SAMPLE_RC_RATIO = 0.25; +static constexpr u64 MAX_DT_QUEUE_SIZE = 1UL << 12; +static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8; PerformanceTracker::PerformanceTracker(const std::optional log_name, const std::optional
sample_window_duration) @@ -39,7 +41,8 @@ void PerformanceTracker::Reset() { std::unique_lock lock{m_mutex}; - QueueClear(); + m_dt_total = DT::zero(); + m_dt_queue.clear(); m_last_time = Clock::now(); m_hz_avg = 0.0; m_dt_avg = DT::zero(); @@ -60,17 +63,16 @@ void PerformanceTracker::Count() m_last_time = time; - QueuePush(diff); - m_dt_total += diff; + PushFront(diff); - if (m_dt_queue_begin == m_dt_queue_end) - m_dt_total -= QueuePop(); + if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE) + PopBack(); - while (window <= m_dt_total - QueueTop()) - m_dt_total -= QueuePop(); + while (m_dt_total - m_dt_queue.back() >= window) + PopBack(); // Simple Moving Average Throughout the Window - m_dt_avg = m_dt_total / QueueSize(); + m_dt_avg = m_dt_total / m_dt_queue.size(); const double hz = DT_s(1.0) / m_dt_avg; // Exponential Moving Average @@ -114,28 +116,28 @@ DT PerformanceTracker::GetDtStd() const if (m_dt_std) return *m_dt_std; - if (QueueEmpty()) + if (m_dt_queue.empty()) return *(m_dt_std = DT::zero()); double total = 0.0; - for (std::size_t i = m_dt_queue_begin; i != m_dt_queue_end; i = IncrementIndex(i)) + for (auto dt : m_dt_queue) { - double diff = DT_s(m_dt_queue[i] - m_dt_avg).count(); + double diff = DT_s(dt - m_dt_avg).count(); total += diff * diff; } // This is a weighted standard deviation - return *(m_dt_std = std::chrono::duration_cast
(DT_s(std::sqrt(total / QueueSize())))); + return *(m_dt_std = std::chrono::duration_cast
(DT_s(std::sqrt(total / m_dt_queue.size())))); } DT PerformanceTracker::GetLastRawDt() const { std::shared_lock lock{m_mutex}; - if (QueueEmpty()) + if (m_dt_queue.empty()) return DT::zero(); - return QueueBottom(); + return m_dt_queue.front(); } void PerformanceTracker::ImPlotPlotLines(const char* label) const @@ -144,14 +146,14 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const std::shared_lock lock{m_mutex}; - if (QueueEmpty()) + if (m_dt_queue.empty()) return; // Decides if there are too many points to plot using rectangles - const bool quality = QueueSize() < MAX_QUALITY_GRAPH_SIZE; + const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE; const DT update_time = Clock::now() - m_last_time; - const float predicted_frame_time = DT_ms(std::max(update_time, QueueBottom())).count(); + const float predicted_frame_time = DT_ms(std::max(update_time, m_dt_queue.front())).count(); std::size_t points = 0; if (quality) @@ -165,11 +167,9 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const y[points] = predicted_frame_time; ++points; - const std::size_t begin = DecrementIndex(m_dt_queue_end); - const std::size_t end = DecrementIndex(m_dt_queue_begin); - for (std::size_t i = begin; i != end; i = DecrementIndex(i)) + for (auto dt : m_dt_queue) { - const float frame_time_ms = DT_ms(m_dt_queue[i]).count(); + const float frame_time_ms = DT_ms(dt).count(); if (quality) { @@ -186,44 +186,16 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const ImPlot::PlotLine(label, x.data(), y.data(), static_cast(points)); } -void PerformanceTracker::QueueClear() +void PerformanceTracker::PushFront(DT value) { - m_dt_total = DT::zero(); - m_dt_queue_begin = 0; - m_dt_queue_end = 0; + m_dt_queue.push_front(value); + m_dt_total += value; } -void PerformanceTracker::QueuePush(DT dt) +void PerformanceTracker::PopBack() { - m_dt_queue[m_dt_queue_end] = dt; - m_dt_queue_end = IncrementIndex(m_dt_queue_end); -} - -const DT& PerformanceTracker::QueuePop() -{ - const std::size_t top = m_dt_queue_begin; - m_dt_queue_begin = IncrementIndex(m_dt_queue_begin); - return m_dt_queue[top]; -} - -const DT& PerformanceTracker::QueueTop() const -{ - return m_dt_queue[m_dt_queue_begin]; -} - -const DT& PerformanceTracker::QueueBottom() const -{ - return m_dt_queue[DecrementIndex(m_dt_queue_end)]; -} - -std::size_t PerformanceTracker::QueueSize() const -{ - return GetDifference(m_dt_queue_begin, m_dt_queue_end); -} - -bool PerformanceTracker::QueueEmpty() const -{ - return m_dt_queue_begin == m_dt_queue_end; + m_dt_total -= m_dt_queue.back(); + m_dt_queue.pop_back(); } void PerformanceTracker::LogRenderTimeToFile(DT val) diff --git a/Source/Core/VideoCommon/PerformanceTracker.h b/Source/Core/VideoCommon/PerformanceTracker.h index a41271abaa..8f39994d18 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.h +++ b/Source/Core/VideoCommon/PerformanceTracker.h @@ -3,7 +3,7 @@ #pragma once -#include +#include #include #include #include @@ -12,26 +12,6 @@ class PerformanceTracker { -private: - // Must be powers of 2 for masking to work - static constexpr u64 MAX_DT_QUEUE_SIZE = 1UL << 12; - static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8; - - static inline std::size_t IncrementIndex(const std::size_t index) - { - return (index + 1) & (MAX_DT_QUEUE_SIZE - 1); - } - - static inline std::size_t DecrementIndex(const std::size_t index) - { - return (index - 1) & (MAX_DT_QUEUE_SIZE - 1); - } - - static inline std::size_t GetDifference(const std::size_t begin, const std::size_t end) - { - return (end - begin) & (MAX_DT_QUEUE_SIZE - 1); - } - public: PerformanceTracker(const std::optional log_name = std::nullopt, const std::optional
sample_window_duration = std::nullopt); @@ -58,20 +38,13 @@ public: void ImPlotPlotLines(const char* label) const; -private: // Functions for managing dt queue - inline void QueueClear(); - inline void QueuePush(DT dt); - inline const DT& QueuePop(); - inline const DT& QueueTop() const; - inline const DT& QueueBottom() const; - - std::size_t inline QueueSize() const; - bool inline QueueEmpty() const; - - // Handle pausing and logging +private: void LogRenderTimeToFile(DT val); void SetPaused(bool paused); + void PushFront(DT value); + void PopBack(); + bool m_paused = false; int m_on_state_changed_handle; @@ -87,9 +60,7 @@ private: // Functions for managing dt queue // Queue + Running Total used to calculate average dt DT m_dt_total = DT::zero(); - std::array m_dt_queue; - std::size_t m_dt_queue_begin = 0; - std::size_t m_dt_queue_end = 0; + std::deque
m_dt_queue; // Average rate/time throughout the window DT m_dt_avg = DT::zero(); // Uses Moving Average From 46e0952e97bae2cfdecaeed88f7768d028718918 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 5 Mar 2025 03:26:34 -0600 Subject: [PATCH 3/3] PerformanceTracker: Use SPSCQueue and atomic to eliminate need for a mutex. Clean up some math. --- .../Core/VideoCommon/PerformanceMetrics.cpp | 4 + .../Core/VideoCommon/PerformanceTracker.cpp | 164 ++++++++---------- Source/Core/VideoCommon/PerformanceTracker.h | 38 ++-- 3 files changed, 93 insertions(+), 113 deletions(-) diff --git a/Source/Core/VideoCommon/PerformanceMetrics.cpp b/Source/Core/VideoCommon/PerformanceMetrics.cpp index 773dc44383..2a4928cd7e 100644 --- a/Source/Core/VideoCommon/PerformanceMetrics.cpp +++ b/Source/Core/VideoCommon/PerformanceMetrics.cpp @@ -48,6 +48,7 @@ void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles { std::unique_lock lock(m_time_lock); m_speed_counter.Count(); + m_speed_counter.UpdateStats(); m_real_times[m_time_index] = Clock::now() - m_time_sleeping; m_cpu_times[m_time_index] = system.GetCoreTiming().GetCPUTimePoint(cyclesLate); @@ -84,6 +85,9 @@ double PerformanceMetrics::GetLastSpeedDenominator() const void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale) { + m_vps_counter.UpdateStats(); + m_fps_counter.UpdateStats(); + const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ? ImGuiWindowFlags_None : ImGuiWindowFlags_NoMove; diff --git a/Source/Core/VideoCommon/PerformanceTracker.cpp b/Source/Core/VideoCommon/PerformanceTracker.cpp index 302adc6a57..f61f8fe63e 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.cpp +++ b/Source/Core/VideoCommon/PerformanceTracker.cpp @@ -6,12 +6,12 @@ #include #include #include -#include #include #include "Common/CommonTypes.h" #include "Common/FileUtil.h" +#include "Common/MathUtil.h" #include "Core/Core.h" #include "VideoCommon/VideoConfig.h" @@ -21,14 +21,11 @@ static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8; PerformanceTracker::PerformanceTracker(const std::optional log_name, const std::optional
sample_window_duration) - : m_on_state_changed_handle{Core::AddOnStateChangedCallback([this](Core::State state) { - if (state == Core::State::Paused) - SetPaused(true); - else if (state == Core::State::Running) - SetPaused(false); - })}, - m_log_name{log_name}, m_sample_window_duration{sample_window_duration} + : m_log_name{log_name}, m_sample_window_duration{sample_window_duration} { + m_on_state_changed_handle = + Core::AddOnStateChangedCallback([this](Core::State state) { m_is_last_time_sane = false; }); + Reset(); } @@ -39,112 +36,110 @@ PerformanceTracker::~PerformanceTracker() void PerformanceTracker::Reset() { - std::unique_lock lock{m_mutex}; + m_raw_dts.Clear(); + m_dt_queue.clear(); m_dt_total = DT::zero(); - m_dt_queue.clear(); + m_last_raw_dt = DT::zero(); m_last_time = Clock::now(); m_hz_avg = 0.0; m_dt_avg = DT::zero(); - m_dt_std = std::nullopt; + m_dt_std = DT::zero(); + m_is_last_time_sane = false; } void PerformanceTracker::Count() { - std::unique_lock lock{m_mutex}; + const TimePoint current_time{Clock::now()}; - if (m_paused) + const DT diff{current_time - m_last_time}; + m_last_time = current_time; + + if (!m_is_last_time_sane) + { + m_is_last_time_sane = true; return; + } - const DT window{GetSampleWindow()}; + m_last_raw_dt = diff; + m_raw_dts.Push(diff); +} - const TimePoint time{Clock::now()}; - const DT diff{time - m_last_time}; +void PerformanceTracker::UpdateStats() +{ + DT diff{}; + while (m_raw_dts.Pop(diff)) + HandleRawDt(diff); - m_last_time = time; + // Update Std Dev + MathUtil::RunningVariance variance; + for (auto dt : m_dt_queue) + variance.Push(DT_s(dt).count()); + m_dt_std = std::chrono::duration_cast
(DT_s(variance.PopulationStandardDeviation())); +} + +void PerformanceTracker::HandleRawDt(DT diff) +{ + if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE) + PopBack(); PushFront(diff); - if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE) - PopBack(); + const DT window{GetSampleWindow()}; while (m_dt_total - m_dt_queue.back() >= window) PopBack(); // Simple Moving Average Throughout the Window - m_dt_avg = m_dt_total / m_dt_queue.size(); - const double hz = DT_s(1.0) / m_dt_avg; + const DT dt_avg = m_dt_total / m_dt_queue.size(); + const double hz = DT_s(1.0) / dt_avg; + m_dt_avg = dt_avg; // Exponential Moving Average const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total); const double a = 1.0 - std::exp(-(DT_s(diff) / rc)); // Sometimes euler averages can break when the average is inf/nan - if (std::isfinite(m_hz_avg)) - m_hz_avg += a * (hz - m_hz_avg); + const auto hz_avg = m_hz_avg.load(); + if (std::isfinite(hz_avg)) + m_hz_avg = hz_avg + a * (hz - hz_avg); else m_hz_avg = hz; - m_dt_std = std::nullopt; - LogRenderTimeToFile(diff); } DT PerformanceTracker::GetSampleWindow() const { - // This reads a constant value and thus does not need a mutex return m_sample_window_duration.value_or( duration_cast
(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)})); } double PerformanceTracker::GetHzAvg() const { - std::shared_lock lock{m_mutex}; return m_hz_avg; } DT PerformanceTracker::GetDtAvg() const { - std::shared_lock lock{m_mutex}; return m_dt_avg; } DT PerformanceTracker::GetDtStd() const { - std::unique_lock lock{m_mutex}; - - if (m_dt_std) - return *m_dt_std; - - if (m_dt_queue.empty()) - return *(m_dt_std = DT::zero()); - - double total = 0.0; - for (auto dt : m_dt_queue) - { - double diff = DT_s(dt - m_dt_avg).count(); - total += diff * diff; - } - - // This is a weighted standard deviation - return *(m_dt_std = std::chrono::duration_cast
(DT_s(std::sqrt(total / m_dt_queue.size())))); + return m_dt_std; } DT PerformanceTracker::GetLastRawDt() const { - std::shared_lock lock{m_mutex}; - - if (m_dt_queue.empty()) - return DT::zero(); - - return m_dt_queue.front(); + return m_last_raw_dt; } void PerformanceTracker::ImPlotPlotLines(const char* label) const { - static std::array x, y; - - std::shared_lock lock{m_mutex}; + // "quality" graph uses twice as many points. + static_assert(MAX_QUALITY_GRAPH_SIZE * 2 <= MAX_DT_QUEUE_SIZE); + static std::array x, y; if (m_dt_queue.empty()) return; @@ -152,38 +147,32 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const // Decides if there are too many points to plot using rectangles const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE; - const DT update_time = Clock::now() - m_last_time; - const float predicted_frame_time = DT_ms(std::max(update_time, m_dt_queue.front())).count(); - - std::size_t points = 0; - if (quality) - { - x[points] = 0.f; - y[points] = predicted_frame_time; - ++points; - } - - x[points] = DT_ms(update_time).count(); - y[points] = predicted_frame_time; - ++points; - - for (auto dt : m_dt_queue) - { - const float frame_time_ms = DT_ms(dt).count(); + std::size_t point_index = 0; + const auto add_point = [&](DT dt, DT shift_x, float prev_ms) { + const float ms = DT_ms{dt}.count(); if (quality) { - x[points] = x[points - 1]; - y[points] = frame_time_ms; - ++points; + x[point_index] = prev_ms; + y[point_index] = ms; + ++point_index; } - x[points] = x[points - 1] + frame_time_ms; - y[points] = frame_time_ms; - ++points; - } + x[point_index] = prev_ms + DT_ms{shift_x}.count(); + y[point_index] = ms; + ++point_index; + }; - ImPlot::PlotLine(label, x.data(), y.data(), static_cast(points)); + // Rightmost point. + const auto update_time = Clock::now() - m_last_time; + const auto predicted_frame_time = std::max(update_time, m_dt_queue.front()); + add_point(predicted_frame_time, DT{}, 0); + + // Other points, right to left. + for (auto dt : m_dt_queue) + add_point(dt, dt, x[point_index - 1]); + + ImPlot::PlotLine(label, x.data(), y.data(), static_cast(point_index)); } void PerformanceTracker::PushFront(DT value) @@ -211,18 +200,3 @@ void PerformanceTracker::LogRenderTimeToFile(DT val) m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl; } - -void PerformanceTracker::SetPaused(bool paused) -{ - std::unique_lock lock{m_mutex}; - - m_paused = paused; - if (m_paused) - { - m_last_time = TimePoint::max(); - } - else - { - m_last_time = Clock::now(); - } -} diff --git a/Source/Core/VideoCommon/PerformanceTracker.h b/Source/Core/VideoCommon/PerformanceTracker.h index 8f39994d18..e45b5aab4e 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.h +++ b/Source/Core/VideoCommon/PerformanceTracker.h @@ -3,12 +3,13 @@ #pragma once +#include #include #include #include -#include #include "Common/CommonTypes.h" +#include "Common/SPSCQueue.h" class PerformanceTracker { @@ -22,30 +23,30 @@ public: PerformanceTracker(PerformanceTracker&&) = delete; PerformanceTracker& operator=(PerformanceTracker&&) = delete; - // Functions for recording performance information void Reset(); + + // Calls must come from the same thread. + // UpdateStats is expected to be called regularly to empty the SPSC queue. + void UpdateStats(); + void ImPlotPlotLines(const char* label) const; + + // May call from any thread, but not concurrently, not that you'd want to.. void Count(); - // Functions for reading performance information + // May call from any thread. DT GetSampleWindow() const; - double GetHzAvg() const; - DT GetDtAvg() const; DT GetDtStd() const; - DT GetLastRawDt() const; - void ImPlotPlotLines(const char* label) const; - private: void LogRenderTimeToFile(DT val); - void SetPaused(bool paused); + void HandleRawDt(DT value); void PushFront(DT value); void PopBack(); - bool m_paused = false; int m_on_state_changed_handle; // Name of log file and file stream @@ -54,6 +55,12 @@ private: // Last time Count() was called TimePoint m_last_time; + std::atomic m_is_last_time_sane = false; + + // Push'd from Count() + // and Pop'd from UpdateStats() + Common::SPSCQueue m_raw_dts; + std::atomic
m_last_raw_dt = DT::zero(); // Amount of time to sample dt's over (defaults to config) const std::optional
m_sample_window_duration; @@ -63,12 +70,7 @@ private: std::deque
m_dt_queue; // Average rate/time throughout the window - DT m_dt_avg = DT::zero(); // Uses Moving Average - double m_hz_avg = 0.0; // Uses Moving Average + Euler Average - - // Used to initialize this on demand instead of on every Count() - mutable std::optional
m_dt_std = std::nullopt; - - // Used to enable thread safety with the performance tracker - mutable std::shared_mutex m_mutex; + std::atomic
m_dt_avg = DT::zero(); // Uses Moving Average + std::atomic m_hz_avg = 0.0; // Uses Moving Average + Euler Average + std::atomic
m_dt_std = DT::zero(); };