Merge pull request #13398 from jordan-woyak/perf-tracker

PerformanceTracker: Eliminate mutex. General cleanups.
This commit is contained in:
JMC47 2025-03-23 15:21:11 -04:00 committed by GitHub
commit 28f1beeca8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 125 additions and 204 deletions

View file

@ -47,6 +47,7 @@ void PerformanceMetrics::CountThrottleSleep(DT sleep)
void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles_late) void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles_late)
{ {
m_speed_counter.Count(); m_speed_counter.Count();
m_speed_counter.UpdateStats();
const auto ticks = system.GetCoreTiming().GetTicks() - cycles_late; const auto ticks = system.GetCoreTiming().GetTicks() - cycles_late;
const auto real_time = Clock::now() - m_time_sleeping; const auto real_time = Clock::now() - m_time_sleeping;
@ -84,6 +85,9 @@ double PerformanceMetrics::GetMaxSpeed() const
void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale) void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
{ {
m_vps_counter.UpdateStats();
m_fps_counter.UpdateStats();
const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ? const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ?
ImGuiWindowFlags_None : ImGuiWindowFlags_None :
ImGuiWindowFlags_NoMove; ImGuiWindowFlags_NoMove;

View file

@ -45,7 +45,7 @@ public:
private: private:
PerformanceTracker m_fps_counter{"render_times.txt"}; PerformanceTracker m_fps_counter{"render_times.txt"};
PerformanceTracker m_vps_counter{"vblank_times.txt"}; PerformanceTracker m_vps_counter{"vblank_times.txt"};
PerformanceTracker m_speed_counter{std::nullopt, 1000000}; PerformanceTracker m_speed_counter{std::nullopt, std::chrono::seconds{1}};
double m_graph_max_time = 0.0; double m_graph_max_time = 0.0;

View file

@ -6,28 +6,26 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <iomanip> #include <iomanip>
#include <mutex>
#include <implot.h> #include <implot.h>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/FileUtil.h" #include "Common/FileUtil.h"
#include "Common/Timer.h" #include "Common/MathUtil.h"
#include "Core/Core.h" #include "Core/Core.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
static constexpr double SAMPLE_RC_RATIO = 0.25; static constexpr double SAMPLE_RC_RATIO = 0.25;
static constexpr u64 MAX_DT_QUEUE_SIZE = 1UL << 12;
static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8;
PerformanceTracker::PerformanceTracker(const std::optional<std::string> log_name, PerformanceTracker::PerformanceTracker(const std::optional<std::string> log_name,
const std::optional<s64> sample_window_us) const std::optional<DT> sample_window_duration)
: m_on_state_changed_handle{Core::AddOnStateChangedCallback([this](Core::State state) { : m_log_name{log_name}, m_sample_window_duration{sample_window_duration}
if (state == Core::State::Paused)
SetPaused(true);
else if (state == Core::State::Running)
SetPaused(false);
})},
m_log_name{log_name}, m_sample_window_us{sample_window_us}
{ {
m_on_state_changed_handle =
Core::AddOnStateChangedCallback([this](Core::State state) { m_is_last_time_sane = false; });
Reset(); Reset();
} }
@ -38,193 +36,155 @@ PerformanceTracker::~PerformanceTracker()
void PerformanceTracker::Reset() void PerformanceTracker::Reset()
{ {
std::unique_lock lock{m_mutex}; m_raw_dts.Clear();
m_dt_queue.clear();
QueueClear(); m_dt_total = DT::zero();
m_last_raw_dt = DT::zero();
m_last_time = Clock::now(); m_last_time = Clock::now();
m_hz_avg = 0.0; m_hz_avg = 0.0;
m_dt_avg = DT::zero(); m_dt_avg = DT::zero();
m_dt_std = std::nullopt; m_dt_std = DT::zero();
m_is_last_time_sane = false;
} }
void PerformanceTracker::Count() void PerformanceTracker::Count()
{ {
std::unique_lock lock{m_mutex}; const TimePoint current_time{Clock::now()};
if (m_paused) const DT diff{current_time - m_last_time};
m_last_time = current_time;
if (!m_is_last_time_sane)
{
m_is_last_time_sane = true;
return; return;
}
m_last_raw_dt = diff;
m_raw_dts.Push(diff);
}
void PerformanceTracker::UpdateStats()
{
DT diff{};
while (m_raw_dts.Pop(diff))
HandleRawDt(diff);
// Update Std Dev
MathUtil::RunningVariance<double> variance;
for (auto dt : m_dt_queue)
variance.Push(DT_s(dt).count());
m_dt_std = std::chrono::duration_cast<DT>(DT_s(variance.PopulationStandardDeviation()));
}
void PerformanceTracker::HandleRawDt(DT diff)
{
if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE)
PopBack();
PushFront(diff);
const DT window{GetSampleWindow()}; const DT window{GetSampleWindow()};
const TimePoint time{Clock::now()}; while (m_dt_total - m_dt_queue.back() >= window)
const DT diff{time - m_last_time}; PopBack();
m_last_time = time;
QueuePush(diff);
m_dt_total += diff;
if (m_dt_queue_begin == m_dt_queue_end)
m_dt_total -= QueuePop();
while (window <= m_dt_total - QueueTop())
m_dt_total -= QueuePop();
// Simple Moving Average Throughout the Window // Simple Moving Average Throughout the Window
m_dt_avg = m_dt_total / QueueSize(); const DT dt_avg = m_dt_total / m_dt_queue.size();
const double hz = DT_s(1.0) / m_dt_avg; const double hz = DT_s(1.0) / dt_avg;
m_dt_avg = dt_avg;
// Exponential Moving Average // Exponential Moving Average
const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total); const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total);
const double a = 1.0 - std::exp(-(DT_s(diff) / rc)); const double a = 1.0 - std::exp(-(DT_s(diff) / rc));
// Sometimes euler averages can break when the average is inf/nan // Sometimes euler averages can break when the average is inf/nan
if (std::isfinite(m_hz_avg)) const auto hz_avg = m_hz_avg.load();
m_hz_avg += a * (hz - m_hz_avg); if (std::isfinite(hz_avg))
m_hz_avg = hz_avg + a * (hz - hz_avg);
else else
m_hz_avg = hz; m_hz_avg = hz;
m_dt_std = std::nullopt;
LogRenderTimeToFile(diff); LogRenderTimeToFile(diff);
} }
DT PerformanceTracker::GetSampleWindow() const DT PerformanceTracker::GetSampleWindow() const
{ {
// This reads a constant value and thus does not need a mutex return m_sample_window_duration.value_or(
return std::chrono::duration_cast<DT>( duration_cast<DT>(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)}));
DT_us(m_sample_window_us.value_or(std::max(1, g_ActiveConfig.iPerfSampleUSec))));
} }
double PerformanceTracker::GetHzAvg() const double PerformanceTracker::GetHzAvg() const
{ {
std::shared_lock lock{m_mutex};
return m_hz_avg; return m_hz_avg;
} }
DT PerformanceTracker::GetDtAvg() const DT PerformanceTracker::GetDtAvg() const
{ {
std::shared_lock lock{m_mutex};
return m_dt_avg; return m_dt_avg;
} }
DT PerformanceTracker::GetDtStd() const DT PerformanceTracker::GetDtStd() const
{ {
std::unique_lock lock{m_mutex}; return m_dt_std;
if (m_dt_std)
return *m_dt_std;
if (QueueEmpty())
return *(m_dt_std = DT::zero());
double total = 0.0;
for (std::size_t i = m_dt_queue_begin; i != m_dt_queue_end; i = IncrementIndex(i))
{
double diff = DT_s(m_dt_queue[i] - m_dt_avg).count();
total += diff * diff;
}
// This is a weighted standard deviation
return *(m_dt_std = std::chrono::duration_cast<DT>(DT_s(std::sqrt(total / QueueSize()))));
} }
DT PerformanceTracker::GetLastRawDt() const DT PerformanceTracker::GetLastRawDt() const
{ {
std::shared_lock lock{m_mutex}; return m_last_raw_dt;
if (QueueEmpty())
return DT::zero();
return QueueBottom();
} }
void PerformanceTracker::ImPlotPlotLines(const char* label) const void PerformanceTracker::ImPlotPlotLines(const char* label) const
{ {
static std::array<float, MAX_DT_QUEUE_SIZE + 2> x, y; // "quality" graph uses twice as many points.
static_assert(MAX_QUALITY_GRAPH_SIZE * 2 <= MAX_DT_QUEUE_SIZE);
static std::array<float, MAX_DT_QUEUE_SIZE + 1> x, y;
std::shared_lock lock{m_mutex}; if (m_dt_queue.empty())
if (QueueEmpty())
return; return;
// Decides if there are too many points to plot using rectangles // Decides if there are too many points to plot using rectangles
const bool quality = QueueSize() < MAX_QUALITY_GRAPH_SIZE; const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE;
const DT update_time = Clock::now() - m_last_time; std::size_t point_index = 0;
const float predicted_frame_time = DT_ms(std::max(update_time, QueueBottom())).count(); const auto add_point = [&](DT dt, DT shift_x, float prev_ms) {
const float ms = DT_ms{dt}.count();
std::size_t points = 0;
if (quality)
{
x[points] = 0.f;
y[points] = predicted_frame_time;
++points;
}
x[points] = DT_ms(update_time).count();
y[points] = predicted_frame_time;
++points;
const std::size_t begin = DecrementIndex(m_dt_queue_end);
const std::size_t end = DecrementIndex(m_dt_queue_begin);
for (std::size_t i = begin; i != end; i = DecrementIndex(i))
{
const float frame_time_ms = DT_ms(m_dt_queue[i]).count();
if (quality) if (quality)
{ {
x[points] = x[points - 1]; x[point_index] = prev_ms;
y[points] = frame_time_ms; y[point_index] = ms;
++points; ++point_index;
} }
x[points] = x[points - 1] + frame_time_ms; x[point_index] = prev_ms + DT_ms{shift_x}.count();
y[points] = frame_time_ms; y[point_index] = ms;
++points; ++point_index;
} };
ImPlot::PlotLine(label, x.data(), y.data(), static_cast<int>(points)); // Rightmost point.
const auto update_time = Clock::now() - m_last_time;
const auto predicted_frame_time = std::max(update_time, m_dt_queue.front());
add_point(predicted_frame_time, DT{}, 0);
// Other points, right to left.
for (auto dt : m_dt_queue)
add_point(dt, dt, x[point_index - 1]);
ImPlot::PlotLine(label, x.data(), y.data(), static_cast<int>(point_index));
} }
void PerformanceTracker::QueueClear() void PerformanceTracker::PushFront(DT value)
{ {
m_dt_total = DT::zero(); m_dt_queue.push_front(value);
m_dt_queue_begin = 0; m_dt_total += value;
m_dt_queue_end = 0;
} }
void PerformanceTracker::QueuePush(DT dt) void PerformanceTracker::PopBack()
{ {
m_dt_queue[m_dt_queue_end] = dt; m_dt_total -= m_dt_queue.back();
m_dt_queue_end = IncrementIndex(m_dt_queue_end); m_dt_queue.pop_back();
}
const DT& PerformanceTracker::QueuePop()
{
const std::size_t top = m_dt_queue_begin;
m_dt_queue_begin = IncrementIndex(m_dt_queue_begin);
return m_dt_queue[top];
}
const DT& PerformanceTracker::QueueTop() const
{
return m_dt_queue[m_dt_queue_begin];
}
const DT& PerformanceTracker::QueueBottom() const
{
return m_dt_queue[DecrementIndex(m_dt_queue_end)];
}
std::size_t PerformanceTracker::QueueSize() const
{
return GetDifference(m_dt_queue_begin, m_dt_queue_end);
}
bool PerformanceTracker::QueueEmpty() const
{
return m_dt_queue_begin == m_dt_queue_end;
} }
void PerformanceTracker::LogRenderTimeToFile(DT val) void PerformanceTracker::LogRenderTimeToFile(DT val)
@ -240,18 +200,3 @@ void PerformanceTracker::LogRenderTimeToFile(DT val)
m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl; m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl;
} }
void PerformanceTracker::SetPaused(bool paused)
{
std::unique_lock lock{m_mutex};
m_paused = paused;
if (m_paused)
{
m_last_time = TimePoint::max();
}
else
{
m_last_time = Clock::now();
}
}

View file

@ -3,39 +3,19 @@
#pragma once #pragma once
#include <array> #include <atomic>
#include <chrono> #include <deque>
#include <fstream> #include <fstream>
#include <optional> #include <optional>
#include <shared_mutex>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/SPSCQueue.h"
class PerformanceTracker class PerformanceTracker
{ {
private:
// Must be powers of 2 for masking to work
static constexpr u64 MAX_DT_QUEUE_SIZE = 1UL << 12;
static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8;
static inline std::size_t IncrementIndex(const std::size_t index)
{
return (index + 1) & (MAX_DT_QUEUE_SIZE - 1);
}
static inline std::size_t DecrementIndex(const std::size_t index)
{
return (index - 1) & (MAX_DT_QUEUE_SIZE - 1);
}
static inline std::size_t GetDifference(const std::size_t begin, const std::size_t end)
{
return (end - begin) & (MAX_DT_QUEUE_SIZE - 1);
}
public: public:
PerformanceTracker(const std::optional<std::string> log_name = std::nullopt, PerformanceTracker(const std::optional<std::string> log_name = std::nullopt,
const std::optional<s64> sample_window_us = std::nullopt); const std::optional<DT> sample_window_duration = std::nullopt);
~PerformanceTracker(); ~PerformanceTracker();
PerformanceTracker(const PerformanceTracker&) = delete; PerformanceTracker(const PerformanceTracker&) = delete;
@ -43,37 +23,30 @@ public:
PerformanceTracker(PerformanceTracker&&) = delete; PerformanceTracker(PerformanceTracker&&) = delete;
PerformanceTracker& operator=(PerformanceTracker&&) = delete; PerformanceTracker& operator=(PerformanceTracker&&) = delete;
// Functions for recording performance information
void Reset(); void Reset();
void Count();
// Functions for reading performance information
DT GetSampleWindow() const;
double GetHzAvg() const;
DT GetDtAvg() const;
DT GetDtStd() const;
DT GetLastRawDt() const;
// Calls must come from the same thread.
// UpdateStats is expected to be called regularly to empty the SPSC queue.
void UpdateStats();
void ImPlotPlotLines(const char* label) const; void ImPlotPlotLines(const char* label) const;
private: // Functions for managing dt queue // May call from any thread, but not concurrently, not that you'd want to..
inline void QueueClear(); void Count();
inline void QueuePush(DT dt);
inline const DT& QueuePop();
inline const DT& QueueTop() const;
inline const DT& QueueBottom() const;
std::size_t inline QueueSize() const; // May call from any thread.
bool inline QueueEmpty() const; DT GetSampleWindow() const;
double GetHzAvg() const;
DT GetDtAvg() const;
DT GetDtStd() const;
DT GetLastRawDt() const;
// Handle pausing and logging private:
void LogRenderTimeToFile(DT val); void LogRenderTimeToFile(DT val);
void SetPaused(bool paused);
bool m_paused = false; void HandleRawDt(DT value);
void PushFront(DT value);
void PopBack();
int m_on_state_changed_handle; int m_on_state_changed_handle;
// Name of log file and file stream // Name of log file and file stream
@ -82,23 +55,22 @@ private: // Functions for managing dt queue
// Last time Count() was called // Last time Count() was called
TimePoint m_last_time; TimePoint m_last_time;
std::atomic<bool> m_is_last_time_sane = false;
// Push'd from Count()
// and Pop'd from UpdateStats()
Common::SPSCQueue<DT, false> m_raw_dts;
std::atomic<DT> m_last_raw_dt = DT::zero();
// Amount of time to sample dt's over (defaults to config) // Amount of time to sample dt's over (defaults to config)
const std::optional<s64> m_sample_window_us; const std::optional<DT> m_sample_window_duration;
// Queue + Running Total used to calculate average dt // Queue + Running Total used to calculate average dt
DT m_dt_total = DT::zero(); DT m_dt_total = DT::zero();
std::array<DT, MAX_DT_QUEUE_SIZE> m_dt_queue; std::deque<DT> m_dt_queue;
std::size_t m_dt_queue_begin = 0;
std::size_t m_dt_queue_end = 0;
// Average rate/time throughout the window // Average rate/time throughout the window
DT m_dt_avg = DT::zero(); // Uses Moving Average std::atomic<DT> m_dt_avg = DT::zero(); // Uses Moving Average
double m_hz_avg = 0.0; // Uses Moving Average + Euler Average std::atomic<double> m_hz_avg = 0.0; // Uses Moving Average + Euler Average
std::atomic<DT> m_dt_std = DT::zero();
// Used to initialize this on demand instead of on every Count()
mutable std::optional<DT> m_dt_std = std::nullopt;
// Used to enable thread safety with the performance tracker
mutable std::shared_mutex m_mutex;
}; };