GCC Code Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 94.7% 124 / 0 / 131
Functions: 92.9% 13 / 0 / 14
Branches: 64.1% 59 / 0 / 92

src/profiler.cpp
Line Branch Exec Source
1 /**
2 * @file profiler.cpp
3 * @brief Implementation of the lock-free ring buffer profiler with Chrome Tracing export.
4 */
5
6 #include "DetourModKit/profiler.hpp"
7
8 #include <windows.h>
9 #include <algorithm>
10 #include <cstdio>
11 #include <format>
12 #include <memory>
13 #include <string>
14 #include <string_view>
15
16 namespace DetourModKit
17 {
18 namespace
19 {
20 /**
21 * @brief Escapes a string for safe embedding in a JSON value.
22 * @details Handles the characters that are special in JSON strings:
23 * backslash, double quote, and control characters (U+0000..U+001F).
24 * Forward slash is NOT escaped (legal unescaped in JSON per RFC 8259).
25 * @param input The raw string to escape.
26 * @return A JSON-safe escaped string (without surrounding quotes).
27 */
28 65547 std::string escape_json_string(std::string_view input)
29 {
30 65547 std::string out;
31
1/2
✓ Branch 4 → 5 taken 65547 times.
✗ Branch 4 → 33 not taken.
65547 out.reserve(input.size());
32
2/2
✓ Branch 29 → 7 taken 1114224 times.
✓ Branch 29 → 30 taken 65547 times.
1179771 for (const char c : input)
33 {
34
8/8
✓ Branch 7 → 8 taken 1 time.
✓ Branch 7 → 10 taken 1 time.
✓ Branch 7 → 12 taken 1 time.
✓ Branch 7 → 14 taken 1 time.
✓ Branch 7 → 16 taken 1 time.
✓ Branch 7 → 18 taken 1 time.
✓ Branch 7 → 20 taken 1 time.
✓ Branch 7 → 22 taken 1114217 times.
1114224 switch (c)
35 {
36 1 case '"':
37
1/2
✓ Branch 8 → 9 taken 1 time.
✗ Branch 8 → 33 not taken.
1 out += "\\\"";
38 1 break;
39 1 case '\\':
40
1/2
✓ Branch 10 → 11 taken 1 time.
✗ Branch 10 → 33 not taken.
1 out += "\\\\";
41 1 break;
42 1 case '\b':
43
1/2
✓ Branch 12 → 13 taken 1 time.
✗ Branch 12 → 33 not taken.
1 out += "\\b";
44 1 break;
45 1 case '\f':
46
1/2
✓ Branch 14 → 15 taken 1 time.
✗ Branch 14 → 33 not taken.
1 out += "\\f";
47 1 break;
48 1 case '\n':
49
1/2
✓ Branch 16 → 17 taken 1 time.
✗ Branch 16 → 33 not taken.
1 out += "\\n";
50 1 break;
51 1 case '\r':
52
1/2
✓ Branch 18 → 19 taken 1 time.
✗ Branch 18 → 33 not taken.
1 out += "\\r";
53 1 break;
54 1 case '\t':
55
1/2
✓ Branch 20 → 21 taken 1 time.
✗ Branch 20 → 33 not taken.
1 out += "\\t";
56 1 break;
57 1114217 default:
58
2/2
✓ Branch 22 → 23 taken 1 time.
✓ Branch 22 → 26 taken 1114216 times.
1114217 if (static_cast<unsigned char>(c) < 0x20)
59 {
60 // Control characters U+0000..U+001F require \uXXXX encoding
61 char buf[8];
62 1 std::snprintf(buf, sizeof(buf), "\\u%04x",
63
1/2
✓ Branch 23 → 24 taken 1 time.
✗ Branch 23 → 32 not taken.
1 static_cast<unsigned int>(static_cast<unsigned char>(c)));
64
1/2
✓ Branch 24 → 25 taken 1 time.
✗ Branch 24 → 32 not taken.
1 out += buf;
65 }
66 else
67 {
68
1/2
✓ Branch 26 → 27 taken 1114216 times.
✗ Branch 26 → 33 not taken.
1114216 out += c;
69 }
70 1114217 break;
71 }
72 }
73 65547 return out;
74 }
75 } // namespace
76
77 // --- Profiler ---
78
79 1 Profiler::Profiler()
80 1 : buffer_(std::make_unique<ProfileSample[]>(DEFAULT_CAPACITY)),
81 1 capacity_(DEFAULT_CAPACITY),
82 1 mask_(DEFAULT_CAPACITY - 1)
83 {
84 LARGE_INTEGER freq;
85
1/2
✓ Branch 4 → 5 taken 1 time.
✗ Branch 4 → 6 not taken.
1 QueryPerformanceFrequency(&freq);
86 1 qpc_frequency_ = freq.QuadPart;
87 1 }
88
89 1999 Profiler &Profiler::get_instance() noexcept
90 {
91
3/4
✓ Branch 2 → 3 taken 1 time.
✓ Branch 2 → 8 taken 1998 times.
✓ Branch 4 → 5 taken 1 time.
✗ Branch 4 → 8 not taken.
1999 static Profiler instance;
92 2040 return instance;
93 }
94
95 578729 void Profiler::record(const char *name, int64_t start_ticks, int64_t end_ticks,
96 uint32_t thread_id) noexcept
97 {
98 578729 const int64_t delta_ticks = end_ticks - start_ticks;
99
100 // Convert ticks to microseconds: (delta * 1'000'000) / frequency.
101 // Use 64-bit intermediate to avoid overflow for deltas up to ~9200 seconds
102 // at a 10 MHz QPC frequency (common on modern hardware).
103 const auto duration_us = static_cast<uint32_t>(
104 1167424 std::min<int64_t>((delta_ticks * 1'000'000) / qpc_frequency_,
105 578729 static_cast<int64_t>(UINT32_MAX)));
106
107 588695 const size_t idx = write_pos_.fetch_add(1, std::memory_order_relaxed) & mask_;
108
109 588695 auto &sample = buffer_[idx];
110
111 // Open the write window with a monotonic increment. The result is
112 // guaranteed odd because every closed sequence is even (sequence
113 // starts at 0 in the constructor and reset(), and each record()
114 // contributes exactly +2). Using fetch_add avoids the load-then-
115 // store RMW pattern: a producer preempted between a relaxed load
116 // and its first store could otherwise roll the slot's sequence
117 // backwards if another producer completed a full write on the
118 // same slot in the interim. fetch_add forbids that rollback.
119 //
120 // Design note: if a writer is stalled between its fetch_add and
121 // its final sequence store, and 65536 intervening record() calls
122 // advance write_pos_ past a full buffer wrap, a new writer will
123 // land on the same slot and clobber the stalled writer's data.
124 // This requires the stalled writer to be preempted for the
125 // duration of an entire ring buffer cycle, which is unreachable
126 // at game-modding thread counts and frame rates. We accept this
127 // theoretical imprecision to keep the hot path to a single
128 // fetch_add + two stores with no CAS retry loop.
129 //
130 // Monotonicity is unconditionally guaranteed by fetch_add: per
131 // [atomics.types.operations] the counter cannot roll backwards
132 // regardless of how many producers race on the same slot. Do NOT
133 // replace this with a load-then-store RMW: that would re-introduce
134 // the stale-publish race on wrap collision that this protocol
135 // exists to prevent.
136 static_assert(std::atomic<uint32_t>::is_always_lock_free,
137 "sequence counter must be lock-free for the seqlock protocol");
138 526719 (void)sample.sequence.fetch_add(1, std::memory_order_acq_rel);
139
140 526719 sample.name = name;
141 526719 sample.start_ticks = start_ticks;
142 526719 sample.duration_us = duration_us;
143 526719 sample.thread_id = thread_id;
144
145 // Close the write window. Another +1 keeps the slot's sequence
146 // monotonic and lands it on an even value, signalling a fully
147 // committed sample. Readers that observe an odd value skip this
148 // slot to avoid reading torn fields.
149 526719 (void)sample.sequence.fetch_add(1, std::memory_order_release);
150 526719 }
151
152 // Caller must ensure no concurrent record() calls are in flight.
153 // There is no runtime guard because adding an atomic "recording active"
154 // counter would penalize every record() call on the hot path for a
155 // contract that is only relevant during session boundaries.
156 49 void Profiler::reset() noexcept
157 {
158 49 write_pos_.store(0, std::memory_order_relaxed);
159
2/2
✓ Branch 21 → 11 taken 3211264 times.
✓ Branch 21 → 22 taken 49 times.
3211313 for (size_t i = 0; i < capacity_; ++i)
160 {
161 3211264 auto &s = buffer_[i];
162 3211264 s.sequence.store(0, std::memory_order_relaxed);
163 3211264 s.name = nullptr;
164 3211264 s.start_ticks = 0;
165 3211264 s.duration_us = 0;
166 3211264 s.thread_id = 0;
167 }
168 49 }
169
170 13 std::string Profiler::export_chrome_json() const
171 {
172 13 const size_t total = write_pos_.load(std::memory_order_relaxed);
173 13 const size_t count = std::min(total, capacity_);
174
175
2/2
✓ Branch 10 → 11 taken 2 times.
✓ Branch 10 → 16 taken 11 times.
13 if (count == 0)
176 {
177
1/2
✓ Branch 13 → 14 taken 2 times.
✗ Branch 13 → 50 not taken.
4 return "[]";
178 }
179
180 // Determine start index: if the buffer has wrapped, start from the
181 // oldest surviving sample; otherwise start from 0.
182
2/2
✓ Branch 16 → 17 taken 1 time.
✓ Branch 16 → 18 taken 10 times.
11 const size_t start_idx = (total > capacity_) ? (total & mask_) : 0;
183
184 // Pre-allocate: ~120 bytes per JSON event is a reasonable estimate.
185 11 std::string json;
186
1/2
✓ Branch 20 → 21 taken 11 times.
✗ Branch 20 → 61 not taken.
11 json.reserve(count * 120 + 4);
187
1/2
✓ Branch 21 → 22 taken 11 times.
✗ Branch 21 → 61 not taken.
11 json += "[\n";
188
189 // QPC frequency for converting start_ticks to microseconds
190 11 const double ticks_to_us = 1'000'000.0 / static_cast<double>(qpc_frequency_);
191
192 11 bool first = true;
193
2/2
✓ Branch 44 → 23 taken 65547 times.
✓ Branch 44 → 45 taken 11 times.
65558 for (size_t i = 0; i < count; ++i)
194 {
195 65547 const auto &s = buffer_[(start_idx + i) & mask_];
196
197 // Single pre-read sequence check: skip if odd (in-flight write).
198 // A full seqlock would re-check after reading fields to detect
199 // writes that started mid-read, but we intentionally omit the
200 // post-read re-check to avoid a second atomic load per sample
201 // on the export path. The resulting race window is narrow
202 // (a write must start between the sequence load and the field
203 // reads) and benign -- a stale-but-consistent sample may appear
204 // in the export at worst. Same trade-off as InputPoller's
205 // relaxed active_states_ reads (stale by one cycle is acceptable).
206 65547 const uint32_t seq = s.sequence.load(std::memory_order_acquire);
207
2/4
✓ Branch 31 → 32 taken 65547 times.
✗ Branch 31 → 33 not taken.
✗ Branch 32 → 33 not taken.
✓ Branch 32 → 34 taken 65547 times.
65547 if ((seq & 1) != 0 || s.name == nullptr)
208 {
209 continue;
210 }
211
212
2/2
✓ Branch 34 → 35 taken 65536 times.
✓ Branch 34 → 36 taken 11 times.
65547 if (!first)
213 {
214
1/2
✓ Branch 35 → 36 taken 65536 times.
✗ Branch 35 → 60 not taken.
65536 json += ",\n";
215 }
216 65547 first = false;
217
218 // Chrome Trace Event Format: "X" = complete event (has duration).
219 // Escape the name to produce valid JSON even if the caller
220 // passes a string containing quotes or backslashes.
221 65547 const double ts = static_cast<double>(s.start_ticks) * ticks_to_us;
222
1/2
✓ Branch 37 → 38 taken 65547 times.
✗ Branch 37 → 53 not taken.
65547 const std::string escaped_name = escape_json_string(s.name);
223 65547 json += std::format(
224 R"({{"name":"{}","ph":"X","ts":{:.1f},"dur":{},"pid":1,"tid":{}}})",
225
2/4
✓ Branch 38 → 39 taken 65547 times.
✗ Branch 38 → 56 not taken.
✓ Branch 39 → 40 taken 65547 times.
✗ Branch 39 → 54 not taken.
65547 escaped_name, ts, s.duration_us, s.thread_id);
226 65547 }
227
228
1/2
✓ Branch 45 → 46 taken 11 times.
✗ Branch 45 → 61 not taken.
11 json += "\n]";
229 11 return json;
230 11 }
231
232 3 bool Profiler::export_to_file(std::string_view path) const
233 {
234
1/2
✓ Branch 2 → 3 taken 3 times.
✗ Branch 2 → 43 not taken.
3 const std::string json = export_chrome_json();
235
1/2
✓ Branch 5 → 6 taken 3 times.
✗ Branch 5 → 34 not taken.
3 const std::string path_str(path);
236
237 const auto closer = [](std::FILE *f)
238 { std::fclose(f); };
239 3 std::FILE *file_ptr = nullptr;
240
241
1/2
✓ Branch 8 → 9 taken 3 times.
✗ Branch 8 → 39 not taken.
3 const errno_t err = fopen_s(&file_ptr, path_str.c_str(), "wb");
242
3/4
✓ Branch 9 → 10 taken 2 times.
✓ Branch 9 → 11 taken 1 time.
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 2 times.
3 if (err != 0 || file_ptr == nullptr)
243 {
244 1 return false;
245 }
246
247 2 std::unique_ptr<std::FILE, decltype(closer)> fp(file_ptr, closer);
248
1/2
✓ Branch 16 → 17 taken 2 times.
✗ Branch 16 → 37 not taken.
2 const size_t written = std::fwrite(json.data(), 1, json.size(), fp.get());
249
1/2
✗ Branch 18 → 19 not taken.
✓ Branch 18 → 20 taken 2 times.
2 if (written != json.size())
250 {
251 return false;
252 }
253
2/4
✓ Branch 21 → 22 taken 2 times.
✗ Branch 21 → 37 not taken.
✗ Branch 22 → 23 not taken.
✓ Branch 22 → 24 taken 2 times.
2 if (std::fflush(fp.get()) != 0)
254 {
255 return false;
256 }
257 // Release the pointer so unique_ptr does not double-close.
258
2/4
✓ Branch 25 → 26 taken 2 times.
✗ Branch 25 → 37 not taken.
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 2 times.
2 if (std::fclose(fp.release()) != 0)
259 {
260 return false;
261 }
262 2 return true;
263 3 }
264
265 13 size_t Profiler::total_samples_recorded() const noexcept
266 {
267 26 return write_pos_.load(std::memory_order_relaxed);
268 }
269
270 6 size_t Profiler::available_samples() const noexcept
271 {
272 12 return std::min(write_pos_.load(std::memory_order_relaxed), capacity_);
273 }
274
275 4 size_t Profiler::capacity() const noexcept
276 {
277 4 return capacity_;
278 }
279
280 1 int64_t Profiler::qpc_frequency() const noexcept
281 {
282 1 return qpc_frequency_;
283 }
284
285 // --- ScopedProfile ---
286
287 1954 ScopedProfile::ScopedProfile(const char *name, literal_tag) noexcept
288 1954 : name_(name), thread_id_(GetCurrentThreadId())
289 {
290 LARGE_INTEGER ticks;
291 1956 QueryPerformanceCounter(&ticks);
292 1953 start_ticks_ = ticks.QuadPart;
293 1953 }
294
295 1964 ScopedProfile::~ScopedProfile() noexcept
296 {
297 LARGE_INTEGER ticks;
298 1964 QueryPerformanceCounter(&ticks);
299 1958 Profiler::get_instance().record(name_, start_ticks_, ticks.QuadPart, thread_id_);
300 1972 }
301
302 } // namespace DetourModKit
303