mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-04-25 06:44:59 +00:00
PerformanceTracker: Use SPSCQueue and atomic to eliminate need for a mutex. Clean up some math.
This commit is contained in:
parent
c763961112
commit
46e0952e97
3 changed files with 93 additions and 113 deletions
|
@ -48,6 +48,7 @@ void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles
|
||||||
{
|
{
|
||||||
std::unique_lock lock(m_time_lock);
|
std::unique_lock lock(m_time_lock);
|
||||||
m_speed_counter.Count();
|
m_speed_counter.Count();
|
||||||
|
m_speed_counter.UpdateStats();
|
||||||
|
|
||||||
m_real_times[m_time_index] = Clock::now() - m_time_sleeping;
|
m_real_times[m_time_index] = Clock::now() - m_time_sleeping;
|
||||||
m_cpu_times[m_time_index] = system.GetCoreTiming().GetCPUTimePoint(cyclesLate);
|
m_cpu_times[m_time_index] = system.GetCoreTiming().GetCPUTimePoint(cyclesLate);
|
||||||
|
@ -84,6 +85,9 @@ double PerformanceMetrics::GetLastSpeedDenominator() const
|
||||||
|
|
||||||
void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
|
void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
|
||||||
{
|
{
|
||||||
|
m_vps_counter.UpdateStats();
|
||||||
|
m_fps_counter.UpdateStats();
|
||||||
|
|
||||||
const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ?
|
const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ?
|
||||||
ImGuiWindowFlags_None :
|
ImGuiWindowFlags_None :
|
||||||
ImGuiWindowFlags_NoMove;
|
ImGuiWindowFlags_NoMove;
|
||||||
|
|
|
@ -6,12 +6,12 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#include <implot.h>
|
#include <implot.h>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/FileUtil.h"
|
#include "Common/FileUtil.h"
|
||||||
|
#include "Common/MathUtil.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
|
@ -21,14 +21,11 @@ static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8;
|
||||||
|
|
||||||
PerformanceTracker::PerformanceTracker(const std::optional<std::string> log_name,
|
PerformanceTracker::PerformanceTracker(const std::optional<std::string> log_name,
|
||||||
const std::optional<DT> sample_window_duration)
|
const std::optional<DT> sample_window_duration)
|
||||||
: m_on_state_changed_handle{Core::AddOnStateChangedCallback([this](Core::State state) {
|
: m_log_name{log_name}, m_sample_window_duration{sample_window_duration}
|
||||||
if (state == Core::State::Paused)
|
|
||||||
SetPaused(true);
|
|
||||||
else if (state == Core::State::Running)
|
|
||||||
SetPaused(false);
|
|
||||||
})},
|
|
||||||
m_log_name{log_name}, m_sample_window_duration{sample_window_duration}
|
|
||||||
{
|
{
|
||||||
|
m_on_state_changed_handle =
|
||||||
|
Core::AddOnStateChangedCallback([this](Core::State state) { m_is_last_time_sane = false; });
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,112 +36,110 @@ PerformanceTracker::~PerformanceTracker()
|
||||||
|
|
||||||
void PerformanceTracker::Reset()
|
void PerformanceTracker::Reset()
|
||||||
{
|
{
|
||||||
std::unique_lock lock{m_mutex};
|
m_raw_dts.Clear();
|
||||||
|
m_dt_queue.clear();
|
||||||
|
|
||||||
m_dt_total = DT::zero();
|
m_dt_total = DT::zero();
|
||||||
m_dt_queue.clear();
|
m_last_raw_dt = DT::zero();
|
||||||
m_last_time = Clock::now();
|
m_last_time = Clock::now();
|
||||||
m_hz_avg = 0.0;
|
m_hz_avg = 0.0;
|
||||||
m_dt_avg = DT::zero();
|
m_dt_avg = DT::zero();
|
||||||
m_dt_std = std::nullopt;
|
m_dt_std = DT::zero();
|
||||||
|
m_is_last_time_sane = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerformanceTracker::Count()
|
void PerformanceTracker::Count()
|
||||||
{
|
{
|
||||||
std::unique_lock lock{m_mutex};
|
const TimePoint current_time{Clock::now()};
|
||||||
|
|
||||||
if (m_paused)
|
const DT diff{current_time - m_last_time};
|
||||||
|
m_last_time = current_time;
|
||||||
|
|
||||||
|
if (!m_is_last_time_sane)
|
||||||
|
{
|
||||||
|
m_is_last_time_sane = true;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const DT window{GetSampleWindow()};
|
m_last_raw_dt = diff;
|
||||||
|
m_raw_dts.Push(diff);
|
||||||
|
}
|
||||||
|
|
||||||
const TimePoint time{Clock::now()};
|
void PerformanceTracker::UpdateStats()
|
||||||
const DT diff{time - m_last_time};
|
{
|
||||||
|
DT diff{};
|
||||||
|
while (m_raw_dts.Pop(diff))
|
||||||
|
HandleRawDt(diff);
|
||||||
|
|
||||||
m_last_time = time;
|
// Update Std Dev
|
||||||
|
MathUtil::RunningVariance<double> variance;
|
||||||
|
for (auto dt : m_dt_queue)
|
||||||
|
variance.Push(DT_s(dt).count());
|
||||||
|
m_dt_std = std::chrono::duration_cast<DT>(DT_s(variance.PopulationStandardDeviation()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerformanceTracker::HandleRawDt(DT diff)
|
||||||
|
{
|
||||||
|
if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE)
|
||||||
|
PopBack();
|
||||||
|
|
||||||
PushFront(diff);
|
PushFront(diff);
|
||||||
|
|
||||||
if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE)
|
const DT window{GetSampleWindow()};
|
||||||
PopBack();
|
|
||||||
|
|
||||||
while (m_dt_total - m_dt_queue.back() >= window)
|
while (m_dt_total - m_dt_queue.back() >= window)
|
||||||
PopBack();
|
PopBack();
|
||||||
|
|
||||||
// Simple Moving Average Throughout the Window
|
// Simple Moving Average Throughout the Window
|
||||||
m_dt_avg = m_dt_total / m_dt_queue.size();
|
const DT dt_avg = m_dt_total / m_dt_queue.size();
|
||||||
const double hz = DT_s(1.0) / m_dt_avg;
|
const double hz = DT_s(1.0) / dt_avg;
|
||||||
|
m_dt_avg = dt_avg;
|
||||||
|
|
||||||
// Exponential Moving Average
|
// Exponential Moving Average
|
||||||
const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total);
|
const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total);
|
||||||
const double a = 1.0 - std::exp(-(DT_s(diff) / rc));
|
const double a = 1.0 - std::exp(-(DT_s(diff) / rc));
|
||||||
|
|
||||||
// Sometimes euler averages can break when the average is inf/nan
|
// Sometimes euler averages can break when the average is inf/nan
|
||||||
if (std::isfinite(m_hz_avg))
|
const auto hz_avg = m_hz_avg.load();
|
||||||
m_hz_avg += a * (hz - m_hz_avg);
|
if (std::isfinite(hz_avg))
|
||||||
|
m_hz_avg = hz_avg + a * (hz - hz_avg);
|
||||||
else
|
else
|
||||||
m_hz_avg = hz;
|
m_hz_avg = hz;
|
||||||
|
|
||||||
m_dt_std = std::nullopt;
|
|
||||||
|
|
||||||
LogRenderTimeToFile(diff);
|
LogRenderTimeToFile(diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
DT PerformanceTracker::GetSampleWindow() const
|
DT PerformanceTracker::GetSampleWindow() const
|
||||||
{
|
{
|
||||||
// This reads a constant value and thus does not need a mutex
|
|
||||||
return m_sample_window_duration.value_or(
|
return m_sample_window_duration.value_or(
|
||||||
duration_cast<DT>(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)}));
|
duration_cast<DT>(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)}));
|
||||||
}
|
}
|
||||||
|
|
||||||
double PerformanceTracker::GetHzAvg() const
|
double PerformanceTracker::GetHzAvg() const
|
||||||
{
|
{
|
||||||
std::shared_lock lock{m_mutex};
|
|
||||||
return m_hz_avg;
|
return m_hz_avg;
|
||||||
}
|
}
|
||||||
|
|
||||||
DT PerformanceTracker::GetDtAvg() const
|
DT PerformanceTracker::GetDtAvg() const
|
||||||
{
|
{
|
||||||
std::shared_lock lock{m_mutex};
|
|
||||||
return m_dt_avg;
|
return m_dt_avg;
|
||||||
}
|
}
|
||||||
|
|
||||||
DT PerformanceTracker::GetDtStd() const
|
DT PerformanceTracker::GetDtStd() const
|
||||||
{
|
{
|
||||||
std::unique_lock lock{m_mutex};
|
return m_dt_std;
|
||||||
|
|
||||||
if (m_dt_std)
|
|
||||||
return *m_dt_std;
|
|
||||||
|
|
||||||
if (m_dt_queue.empty())
|
|
||||||
return *(m_dt_std = DT::zero());
|
|
||||||
|
|
||||||
double total = 0.0;
|
|
||||||
for (auto dt : m_dt_queue)
|
|
||||||
{
|
|
||||||
double diff = DT_s(dt - m_dt_avg).count();
|
|
||||||
total += diff * diff;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is a weighted standard deviation
|
|
||||||
return *(m_dt_std = std::chrono::duration_cast<DT>(DT_s(std::sqrt(total / m_dt_queue.size()))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DT PerformanceTracker::GetLastRawDt() const
|
DT PerformanceTracker::GetLastRawDt() const
|
||||||
{
|
{
|
||||||
std::shared_lock lock{m_mutex};
|
return m_last_raw_dt;
|
||||||
|
|
||||||
if (m_dt_queue.empty())
|
|
||||||
return DT::zero();
|
|
||||||
|
|
||||||
return m_dt_queue.front();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerformanceTracker::ImPlotPlotLines(const char* label) const
|
void PerformanceTracker::ImPlotPlotLines(const char* label) const
|
||||||
{
|
{
|
||||||
static std::array<float, MAX_DT_QUEUE_SIZE + 2> x, y;
|
// "quality" graph uses twice as many points.
|
||||||
|
static_assert(MAX_QUALITY_GRAPH_SIZE * 2 <= MAX_DT_QUEUE_SIZE);
|
||||||
std::shared_lock lock{m_mutex};
|
static std::array<float, MAX_DT_QUEUE_SIZE + 1> x, y;
|
||||||
|
|
||||||
if (m_dt_queue.empty())
|
if (m_dt_queue.empty())
|
||||||
return;
|
return;
|
||||||
|
@ -152,38 +147,32 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const
|
||||||
// Decides if there are too many points to plot using rectangles
|
// Decides if there are too many points to plot using rectangles
|
||||||
const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE;
|
const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE;
|
||||||
|
|
||||||
const DT update_time = Clock::now() - m_last_time;
|
std::size_t point_index = 0;
|
||||||
const float predicted_frame_time = DT_ms(std::max(update_time, m_dt_queue.front())).count();
|
const auto add_point = [&](DT dt, DT shift_x, float prev_ms) {
|
||||||
|
const float ms = DT_ms{dt}.count();
|
||||||
|
|
||||||
std::size_t points = 0;
|
|
||||||
if (quality)
|
if (quality)
|
||||||
{
|
{
|
||||||
x[points] = 0.f;
|
x[point_index] = prev_ms;
|
||||||
y[points] = predicted_frame_time;
|
y[point_index] = ms;
|
||||||
++points;
|
++point_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
x[points] = DT_ms(update_time).count();
|
x[point_index] = prev_ms + DT_ms{shift_x}.count();
|
||||||
y[points] = predicted_frame_time;
|
y[point_index] = ms;
|
||||||
++points;
|
++point_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Rightmost point.
|
||||||
|
const auto update_time = Clock::now() - m_last_time;
|
||||||
|
const auto predicted_frame_time = std::max(update_time, m_dt_queue.front());
|
||||||
|
add_point(predicted_frame_time, DT{}, 0);
|
||||||
|
|
||||||
|
// Other points, right to left.
|
||||||
for (auto dt : m_dt_queue)
|
for (auto dt : m_dt_queue)
|
||||||
{
|
add_point(dt, dt, x[point_index - 1]);
|
||||||
const float frame_time_ms = DT_ms(dt).count();
|
|
||||||
|
|
||||||
if (quality)
|
ImPlot::PlotLine(label, x.data(), y.data(), static_cast<int>(point_index));
|
||||||
{
|
|
||||||
x[points] = x[points - 1];
|
|
||||||
y[points] = frame_time_ms;
|
|
||||||
++points;
|
|
||||||
}
|
|
||||||
|
|
||||||
x[points] = x[points - 1] + frame_time_ms;
|
|
||||||
y[points] = frame_time_ms;
|
|
||||||
++points;
|
|
||||||
}
|
|
||||||
|
|
||||||
ImPlot::PlotLine(label, x.data(), y.data(), static_cast<int>(points));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerformanceTracker::PushFront(DT value)
|
void PerformanceTracker::PushFront(DT value)
|
||||||
|
@ -211,18 +200,3 @@ void PerformanceTracker::LogRenderTimeToFile(DT val)
|
||||||
|
|
||||||
m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl;
|
m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerformanceTracker::SetPaused(bool paused)
|
|
||||||
{
|
|
||||||
std::unique_lock lock{m_mutex};
|
|
||||||
|
|
||||||
m_paused = paused;
|
|
||||||
if (m_paused)
|
|
||||||
{
|
|
||||||
m_last_time = TimePoint::max();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_last_time = Clock::now();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -3,12 +3,13 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <shared_mutex>
|
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Common/SPSCQueue.h"
|
||||||
|
|
||||||
class PerformanceTracker
|
class PerformanceTracker
|
||||||
{
|
{
|
||||||
|
@ -22,30 +23,30 @@ public:
|
||||||
PerformanceTracker(PerformanceTracker&&) = delete;
|
PerformanceTracker(PerformanceTracker&&) = delete;
|
||||||
PerformanceTracker& operator=(PerformanceTracker&&) = delete;
|
PerformanceTracker& operator=(PerformanceTracker&&) = delete;
|
||||||
|
|
||||||
// Functions for recording performance information
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
|
// Calls must come from the same thread.
|
||||||
|
// UpdateStats is expected to be called regularly to empty the SPSC queue.
|
||||||
|
void UpdateStats();
|
||||||
|
void ImPlotPlotLines(const char* label) const;
|
||||||
|
|
||||||
|
// May call from any thread, but not concurrently, not that you'd want to..
|
||||||
void Count();
|
void Count();
|
||||||
|
|
||||||
// Functions for reading performance information
|
// May call from any thread.
|
||||||
DT GetSampleWindow() const;
|
DT GetSampleWindow() const;
|
||||||
|
|
||||||
double GetHzAvg() const;
|
double GetHzAvg() const;
|
||||||
|
|
||||||
DT GetDtAvg() const;
|
DT GetDtAvg() const;
|
||||||
DT GetDtStd() const;
|
DT GetDtStd() const;
|
||||||
|
|
||||||
DT GetLastRawDt() const;
|
DT GetLastRawDt() const;
|
||||||
|
|
||||||
void ImPlotPlotLines(const char* label) const;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void LogRenderTimeToFile(DT val);
|
void LogRenderTimeToFile(DT val);
|
||||||
void SetPaused(bool paused);
|
|
||||||
|
|
||||||
|
void HandleRawDt(DT value);
|
||||||
void PushFront(DT value);
|
void PushFront(DT value);
|
||||||
void PopBack();
|
void PopBack();
|
||||||
|
|
||||||
bool m_paused = false;
|
|
||||||
int m_on_state_changed_handle;
|
int m_on_state_changed_handle;
|
||||||
|
|
||||||
// Name of log file and file stream
|
// Name of log file and file stream
|
||||||
|
@ -54,6 +55,12 @@ private:
|
||||||
|
|
||||||
// Last time Count() was called
|
// Last time Count() was called
|
||||||
TimePoint m_last_time;
|
TimePoint m_last_time;
|
||||||
|
std::atomic<bool> m_is_last_time_sane = false;
|
||||||
|
|
||||||
|
// Push'd from Count()
|
||||||
|
// and Pop'd from UpdateStats()
|
||||||
|
Common::SPSCQueue<DT, false> m_raw_dts;
|
||||||
|
std::atomic<DT> m_last_raw_dt = DT::zero();
|
||||||
|
|
||||||
// Amount of time to sample dt's over (defaults to config)
|
// Amount of time to sample dt's over (defaults to config)
|
||||||
const std::optional<DT> m_sample_window_duration;
|
const std::optional<DT> m_sample_window_duration;
|
||||||
|
@ -63,12 +70,7 @@ private:
|
||||||
std::deque<DT> m_dt_queue;
|
std::deque<DT> m_dt_queue;
|
||||||
|
|
||||||
// Average rate/time throughout the window
|
// Average rate/time throughout the window
|
||||||
DT m_dt_avg = DT::zero(); // Uses Moving Average
|
std::atomic<DT> m_dt_avg = DT::zero(); // Uses Moving Average
|
||||||
double m_hz_avg = 0.0; // Uses Moving Average + Euler Average
|
std::atomic<double> m_hz_avg = 0.0; // Uses Moving Average + Euler Average
|
||||||
|
std::atomic<DT> m_dt_std = DT::zero();
|
||||||
// Used to initialize this on demand instead of on every Count()
|
|
||||||
mutable std::optional<DT> m_dt_std = std::nullopt;
|
|
||||||
|
|
||||||
// Used to enable thread safety with the performance tracker
|
|
||||||
mutable std::shared_mutex m_mutex;
|
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue