GCC Code Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 100.0% 11 / 0 / 11
Functions: 100.0% 5 / 0 / 5
Branches: -% 0 / 0 / 0

include/DetourModKit/profiler.hpp
Line Branch Exec Source
1 #ifndef DETOURMODKIT_PROFILER_HPP
2 #define DETOURMODKIT_PROFILER_HPP
3
4 /**
5 * @file profiler.hpp
6 * @brief Opt-in profiling instrumentation for measuring hook and subsystem timing.
7 *
8 * @details Provides zero-overhead profiling when disabled at compile time.
9 * When enabled via DMK_ENABLE_PROFILING, records scoped timing samples
10 * into a lock-free ring buffer and exports to Chrome Tracing JSON format
11 * (viewable in chrome://tracing or https://ui.perfetto.dev).
12 *
13 * **Compile-time control:**
14 * - Define DMK_ENABLE_PROFILING before including this header, or
15 * - Pass -DDMK_ENABLE_PROFILING=ON to CMake.
16 *
17 * **Performance characteristics (when enabled):**
18 * - ~50 ns per scoped measurement (two QPC calls + one atomic store)
19 * - Fixed-size ring buffer (no heap allocations on the hot path)
20 * - Lock-free recording from multiple threads
21 *
22 * **Usage:**
23 * @code
24 * void on_camera_update(void* camera_ptr) {
25 * DMK_PROFILE_SCOPE("camera_update");
26 * // ... hook logic ...
27 * }
28 *
29 * // Export after a profiling session
30 * DMKProfiler::get_instance().export_to_file("profile.json");
31 * @endcode
32 */
33
34 #include <atomic>
35 #include <cstddef>
36 #include <cstdint>
37 #include <memory>
38 #include <string>
39 #include <string_view>
40
41 #ifdef DMK_ENABLE_PROFILING
42
43 // Two-level indirection so __LINE__ expands before token pasting.
44 #define DMK_CONCAT_IMPL(a, b) a##b
45 #define DMK_CONCAT(a, b) DMK_CONCAT_IMPL(a, b)
46
47 // Scoped timing measurement. The `name` argument must refer to storage that
48 // outlives the process, because the pointer is stored unchanged in the ring
49 // buffer and read asynchronously by export methods. String literals satisfy
50 // this automatically.
51 //
52 // The ScopedProfile(const char (&)[N]) constructor rejects decayed `const
53 // char *` / `char *` sources (see static_asserts in test_profiler.cpp), but
54 // array-reference binding accepts any array, including function-local
55 // `char buf[N]`. Callers remain responsible for static-storage lifetime.
56 // Prefer string literals or namespace-scope `static constexpr char` arrays.
57 #define DMK_PROFILE_SCOPE(name) \
58 ::DetourModKit::ScopedProfile DMK_CONCAT(dmk_scoped_profile_, __LINE__) { name }
59
60 // Scoped timing using the enclosing function name. `__func__` is a static-
61 // storage array per [dcl.fct.def.general]/8, so it binds to the array-
62 // reference constructor and the stored pointer remains valid for the
63 // lifetime of the process.
64 #define DMK_PROFILE_FUNCTION() \
65 ::DetourModKit::ScopedProfile DMK_CONCAT(dmk_scoped_profile_func_, __LINE__) { __func__ }
66
67 #else
68
69 #define DMK_PROFILE_SCOPE(name) ((void)0)
70 #define DMK_PROFILE_FUNCTION() ((void)0)
71
72 #endif // DMK_ENABLE_PROFILING
73
74 namespace DetourModKit
75 {
76 /**
77 * @brief A single timing sample recorded by the profiler.
78 * @details The sequence field uses odd/even protocol to detect in-flight
79 * writes: record() stores an odd sequence before writing fields
80 * and an even sequence after. Readers skip samples with odd
81 * sequence values (torn/in-progress writes).
82 */
83 struct ProfileSample
84 {
85 std::atomic<uint32_t> sequence{0}; ///< Odd = write in progress, even = committed.
86 /**
87 * @brief Non-owning pointer to the sample name.
88 * @note Caller must ensure the pointed-to string outlives the
89 * process (e.g. a string literal or a namespace-scope
90 * `static constexpr char` array). The ScopedProfile
91 * array-reference constructor only rejects pointer decay;
92 * it does NOT verify static-storage.
93 */
94 const char *name{nullptr};
95 int64_t start_ticks{0}; ///< QPC tick count at scope entry.
96 uint32_t duration_us{0}; ///< Duration in microseconds (max ~71 minutes).
97 uint32_t thread_id{0}; ///< Win32 thread ID of the recording thread.
98
99 65536 ProfileSample() noexcept = default;
100 ProfileSample(const ProfileSample &) = delete;
101 ProfileSample &operator=(const ProfileSample &) = delete;
102 ProfileSample(ProfileSample &&) = delete;
103 ProfileSample &operator=(ProfileSample &&) = delete;
104 };
105
106 /**
107 * @brief Lock-free ring buffer profiler with Chrome Tracing JSON export.
108 *
109 * @details Uses a fixed-capacity power-of-2 ring buffer. Recording is lock-free
110 * via a single atomic fetch_add on the write position. When the buffer
111 * wraps, oldest samples are silently overwritten (no allocation, no lock).
112 *
113 * The profiler is a singleton. All public methods are safe to call from
114 * multiple threads. Export methods take a consistent snapshot by reading
115 * the current write position and walking backwards.
116 *
117 * **Thread safety:**
118 * - `record()`: lock-free (atomic fetch_add + sequence counter)
119 * - `reset()`: safe when no concurrent `record()` calls are in flight
120 * - `export_chrome_json()` / `export_to_file()`: safe to call concurrently
121 * with `record()`. Uses odd/even sequence protocol to skip in-flight
122 * writes, preventing torn reads in the exported data
123 */
124 class Profiler
125 {
126 public:
127 /// Default ring buffer capacity (must be a power of 2).
128 static constexpr size_t DEFAULT_CAPACITY{65536};
129
130 Profiler(const Profiler &) = delete;
131 Profiler &operator=(const Profiler &) = delete;
132 Profiler(Profiler &&) = delete;
133 Profiler &operator=(Profiler &&) = delete;
134
135 /// Returns the global profiler singleton.
136 [[nodiscard]] static Profiler &get_instance() noexcept;
137
138 /**
139 * @brief Records a completed timing sample.
140 * @param name Non-owning pointer that must outlive the process. The
141 * pointer is stored as-is in the ring buffer and read
142 * asynchronously by export methods. Passing a pointer whose
143 * storage is released before process exit (std::string::c_str(),
144 * heap buffers, function-local arrays) is undefined behavior.
145 * Neither this entry point nor the ScopedProfile(const char
146 * (&)[N]) constructor enforces static-storage at compile time;
147 * array-reference binding accepts any array, so callers remain
148 * responsible for lifetime. Safe sources: string literals,
149 * `static constexpr char` arrays at namespace scope, and
150 * `__func__` (see [dcl.fct.def.general]/8).
151 * @param start_ticks QPC tick count at scope entry.
152 * @param end_ticks QPC tick count at scope exit.
153 * @param thread_id Win32 thread ID of the recording thread.
154 * @note Lock-free. Safe to call from any thread at any time.
155 */
156 void record(const char *name, int64_t start_ticks, int64_t end_ticks,
157 uint32_t thread_id) noexcept;
158
159 /**
160 * @brief Resets the profiler, discarding all recorded samples.
161 * @note Not safe to call while other threads are calling record().
162 * Intended for use between profiling sessions.
163 */
164 void reset() noexcept;
165
166 /**
167 * @brief Exports recorded samples as a Chrome Tracing JSON string.
168 * @details Output conforms to the Chrome Trace Event Format (array form).
169 * Open the result in chrome://tracing or https://ui.perfetto.dev.
170 * @return JSON string containing all recorded samples.
171 */
172 [[nodiscard]] std::string export_chrome_json() const;
173
174 /**
175 * @brief Exports recorded samples to a JSON file on disk.
176 * @param path File path to write (created or overwritten).
177 * @return true on success, false on I/O failure.
178 */
179 [[nodiscard]] bool export_to_file(std::string_view path) const;
180
181 /// Returns the number of samples recorded (may exceed capacity due to wrapping).
182 [[nodiscard]] size_t total_samples_recorded() const noexcept;
183
184 /// Returns the number of valid samples available for export (min of recorded, capacity).
185 [[nodiscard]] size_t available_samples() const noexcept;
186
187 /// Returns the ring buffer capacity.
188 [[nodiscard]] size_t capacity() const noexcept;
189
190 /// Returns the QPC frequency (ticks per second) used for timing.
191 [[nodiscard]] int64_t qpc_frequency() const noexcept;
192
193 private:
194 Profiler();
195 1 ~Profiler() = default;
196
197 // write_pos_ first to avoid 40 bytes of padding (alignas(64) requirement).
198 // This placement ensures cache-line alignment for the lock-free ring buffer.
199 alignas(64) std::atomic<size_t> write_pos_{0};
200 std::unique_ptr<ProfileSample[]> buffer_;
201 size_t capacity_;
202 size_t mask_; // capacity_ - 1 for power-of-2 index wrapping
203 int64_t qpc_frequency_{0};
204 };
205
206 /**
207 * @brief RAII scoped profiler that records timing on destruction.
208 *
209 * @details Captures QPC tick count and thread ID in the constructor.
210 * On destruction, computes duration and records the sample in the
211 * global Profiler ring buffer.
212 *
213 * This class is only active when DMK_ENABLE_PROFILING is defined.
214 * Use the DMK_PROFILE_SCOPE() macro instead of constructing directly.
215 */
216 class ScopedProfile
217 {
218 public:
219 /**
220 * @brief Begins a profiling scope.
221 * @tparam N Deduced length of the bound array (including the trailing
222 * null terminator when the source is a string literal).
223 * @param name Reference to a `const char` array. The array-reference
224 * parameter rejects decayed pointer sources (`std::string::
225 * c_str()`, `const char *` function arguments, `char *`
226 * buffers) at compile time, so those fail to bind and produce
227 * a compile error. However, C++ reference binding also accepts
228 * arrays with automatic storage (e.g. `char buf[N] = "...";`
229 * inside a function), which decays to a dangling pointer once
230 * the enclosing scope exits. This overload does NOT prove
231 * static storage; callers must still ensure the bound array
232 * outlives the process. Safe sources: string literals,
233 * namespace-scope `static constexpr char` arrays, and
234 * `__func__` (static-storage per [dcl.fct.def.general]/8).
235 * @note The hot-path cost is unchanged: two pointer-sized stores
236 * (name pointer and thread id) plus the QPC read, same as
237 * the previous `const char*` signature.
238 */
239 template <size_t N>
240 1961 explicit ScopedProfile(const char (&name)[N]) noexcept
241 1961 : ScopedProfile(static_cast<const char *>(name), literal_tag{})
242 {
243 1976 }
244 ~ScopedProfile() noexcept;
245
246 ScopedProfile(const ScopedProfile &) = delete;
247 ScopedProfile &operator=(const ScopedProfile &) = delete;
248 ScopedProfile(ScopedProfile &&) = delete;
249 ScopedProfile &operator=(ScopedProfile &&) = delete;
250
251 private:
252 struct literal_tag
253 {
254 };
255
256 ScopedProfile(const char *name, literal_tag) noexcept;
257
258 const char *name_;
259 int64_t start_ticks_;
260 uint32_t thread_id_;
261 };
262
263 } // namespace DetourModKit
264
265 #endif // DETOURMODKIT_PROFILER_HPP
266