GCC Code Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 85.7% 12 / 0 / 14
Functions: 100.0% 3 / 0 / 3
Branches: 80.0% 4 / 0 / 5

include/DetourModKit/scanner.hpp
Line Branch Exec Source
1 #ifndef DETOURMODKIT_SCANNER_HPP
2 #define DETOURMODKIT_SCANNER_HPP
3
4 #include <array>
5 #include <vector>
6 #include <string>
7 #include <string_view>
8 #include <cstddef>
9 #include <cstdint>
10 #include <expected>
11 #include <limits>
12 #include <optional>
13 #include <span>
14
15 namespace DetourModKit
16 {
17 /**
18 * @enum RipResolveError
19 * @brief Error codes for RIP-relative resolution failures.
20 */
21 enum class RipResolveError
22 {
23 NullInput,
24 PrefixNotFound,
25 RegionTooSmall,
26 UnreadableDisplacement
27 };
28
29 /**
30 * @brief Converts a RipResolveError to a human-readable string.
31 * @param error The error code.
32 * @return A string view describing the error.
33 */
34 4 constexpr std::string_view rip_resolve_error_to_string(RipResolveError error) noexcept
35 {
36
4/5
✓ Branch 2 → 3 taken 1 time.
✓ Branch 2 → 4 taken 1 time.
✓ Branch 2 → 5 taken 1 time.
✓ Branch 2 → 6 taken 1 time.
✗ Branch 2 → 7 not taken.
4 switch (error)
37 {
38 1 case RipResolveError::NullInput:
39 1 return "Null input pointer";
40 1 case RipResolveError::PrefixNotFound:
41 1 return "Opcode prefix not found in search region";
42 1 case RipResolveError::RegionTooSmall:
43 1 return "Search region too small for prefix + displacement";
44 1 case RipResolveError::UnreadableDisplacement:
45 1 return "Displacement bytes at matched location are not readable";
46 default:
47 return "Unknown RIP resolve error";
48 }
49 }
50
51 namespace Scanner
52 {
53 /**
54 * @struct CompiledPattern
55 * @brief A pre-compiled AOB pattern with separate bytes and mask.
56 * @details Stores the pattern bytes and a bitmask indicating which bytes
57 * are wildcards (mask=false) vs. literal values to match (mask=true).
58 * This design avoids sentinel byte conflicts (e.g., 0xCC is a valid byte).
59 */
60 struct CompiledPattern
61 {
62 /**
63 * @brief Pattern bytes, one per token in the source AOB string.
64 * @details Entries at wildcard positions (mask byte == 0x00) contain
65 * arbitrary values and must not be compared against memory.
66 */
67 std::vector<std::byte> bytes;
68
69 /**
70 * @brief Per-byte match mask paralleling @ref bytes.
71 * @details 0xFF marks a literal byte that must match exactly; 0x00
72 * marks a wildcard slot to skip. Sized identically to
73 * @ref bytes.
74 */
75 std::vector<std::byte> mask;
76
77 /**
78 * @brief Byte offset from pattern start to the point of interest.
79 * @details Set by the `|` marker in the AOB string, or 0 if absent.
80 * May equal bytes.size() when `|` appears at the end of the
81 * pattern. The offset is non-negative under the current
82 * parser (`|` cannot precede tokens), but the type is
83 * signed to match pointer-arithmetic conventions
84 * (C++ Core Guidelines ES.106) and to future-proof against
85 * negative anchors.
86 */
87 std::ptrdiff_t offset = 0;
88
89 /**
90 * @brief Cached anchor index selected by compile_anchor().
91 * @details find_pattern() drives its memchr sweep on the byte at
92 * this position. The index is the rarest literal byte in
93 * the pattern (lowest score in a small frequency table
94 * tuned for typical x64 .text sections), so a single
95 * memchr pass produces far fewer false candidate hits
96 * than anchoring on `bytes[0]` would.
97 *
98 * Sentinel values:
99 * - `[0, size())` valid anchor.
100 * - `size()` pattern has no literal bytes
101 * (all wildcards); scan
102 * degenerates to "match at start".
103 * - `>= size() + 1` anchor not yet selected;
104 * find_pattern() will pick one
105 * inline (slower path).
106 *
107 * parse_aob() always calls compile_anchor() before
108 * returning, so patterns produced through the public API
109 * enter find_pattern() with the cached anchor in place.
110 * Manually constructed patterns (assigning `bytes`/`mask`
111 * by hand) start in the "not yet selected" state and
112 * should call @ref compile_anchor() once after
113 * population if they will be scanned repeatedly.
114 */
115 std::size_t anchor = std::numeric_limits<std::size_t>::max();
116
117 /**
118 * @brief Returns the size of the pattern.
119 * @return size_t The number of bytes in the pattern.
120 */
121 848164 size_t size() const noexcept { return bytes.size(); }
122
123 /**
124 * @brief Checks if the pattern is empty.
125 * @return true if the pattern has no bytes.
126 */
127 189 bool empty() const noexcept { return bytes.empty(); }
128
129 /**
130 * @brief Selects and stores the rarest literal byte's index as the
131 * scan anchor.
132 * @details Walks the pattern once, scoring each literal byte
133 * against a small byte-frequency table (`0x00`, `0xCC`,
134 * `0x48`, ... receive high scores; uncommon bytes score
135 * 0), and stores the lowest-scoring index in @ref
136 * anchor. Ties are broken by first occurrence for
137 * deterministic behaviour. An all-wildcard pattern sets
138 * @ref anchor to `size()` so find_pattern() can take its
139 * degenerate "match at region start" path without a
140 * second scan.
141 *
142 * Safe to call repeatedly; the operation is idempotent
143 * and O(size()). Callers that mutate @ref bytes or
144 * @ref mask after a prior compile_anchor() MUST call it
145 * again before the next scan or the cached anchor will
146 * drift from the pattern contents.
147 *
148 * Not thread-safe with concurrent find_pattern() calls
149 * on the same CompiledPattern instance; sequence the
150 * compile step before publishing the pattern to scanners.
151 */
152 void compile_anchor() noexcept;
153 };
154
155 /**
156 * @brief Parses a space-separated AOB string into a compiled pattern.
157 * @details Converts hexadecimal strings to byte values and wildcard tokens
158 * ('??' or '?') into mask=false entries. An optional `|` token marks
159 * the offset within the pattern (stored in CompiledPattern::offset).
160 * This lets wider patterns precisely target a specific instruction:
161 * e.g., "48 8B 88 B8 00 00 00 | 48 89 4C 24 68" sets offset=7.
162 * @param aob_str The AOB pattern string.
163 * @return std::optional<CompiledPattern> The compiled pattern, or std::nullopt on parse failure.
164 */
165 [[nodiscard]] std::optional<CompiledPattern> parse_aob(std::string_view aob_str);
166
167 /**
168 * @brief Scans a specified memory region for a given byte pattern.
169 * @details Uses an optimized search algorithm that finds the first non-wildcard
170 * byte and uses memchr for fast skipping, then verifies the full pattern.
171 * @param start_address Pointer to the beginning of the memory region to scan.
172 * @param region_size The size (in bytes) of the memory region to scan.
173 * @param pattern The compiled pattern to search for.
174 * @return const std::byte* Pointer to the match within the specified region,
175 * already adjusted by `pattern.offset`. Returns nullptr if pattern
176 * not found.
177 * @note A pattern with zero literal bytes (every token wildcarded) returns
178 * `start_address` (plus offset) and emits a warning through the shared
179 * Logger. This case almost always indicates a caller bug; the behaviour
180 * is preserved for backwards compatibility but should not be relied upon.
181 * @note `pattern.offset` (set by a `|` marker in the AOB string) is applied
182 * exactly once. When no marker is present `offset == 0` and the returned
183 * pointer is the match start. Callers must NOT add `pattern.offset`
184 * manually; doing so double-applies and will miss the intended byte.
185 * @warning When `pattern.offset == pattern.size()` (a trailing `|` marker),
186 * the returned pointer addresses one-past the matched range. Depending
187 * on where in the region the match landed, this may also be
188 * one-past the scanned region. The pointer is valid for arithmetic
189 * and bounds comparisons but MUST NOT be dereferenced without an
190 * explicit readability check (e.g. `Memory::is_readable`).
191 */
192 [[nodiscard]] const std::byte *find_pattern(const std::byte *start_address, size_t region_size,
193 const CompiledPattern &pattern);
194
195 /**
196 * @brief Scans a memory region for the Nth occurrence of a byte pattern.
197 * @param start_address Pointer to the beginning of the memory region to scan.
198 * @param region_size The size (in bytes) of the memory region to scan.
199 * @param pattern The compiled pattern to search for.
200 * @param occurrence Which occurrence to return (1-based). 1 = first match.
201 * Passing 0 returns nullptr.
202 * @return const std::byte* Pointer to the Nth occurrence (already adjusted
203 * by `pattern.offset`), or nullptr if fewer than N matches exist.
204 * @note Like the single-occurrence overload, `pattern.offset` is applied
205 * exactly once. Callers must NOT add it manually.
206 * @warning A trailing `|` marker produces a one-past pointer identical in
207 * kind to the single-occurrence overload; do not dereference
208 * without a bounds or readability check.
209 */
210 [[nodiscard]] const std::byte *find_pattern(const std::byte *start_address, size_t region_size,
211 const CompiledPattern &pattern, size_t occurrence);
212 // Common x86-64 RIP-relative opcode prefixes (bytes preceding the disp32 field)
213 inline constexpr std::array<std::byte, 3> PREFIX_MOV_RAX_RIP = {std::byte{0x48}, std::byte{0x8B}, std::byte{0x05}};
214 inline constexpr std::array<std::byte, 3> PREFIX_MOV_RCX_RIP = {std::byte{0x48}, std::byte{0x8B}, std::byte{0x0D}};
215 inline constexpr std::array<std::byte, 3> PREFIX_MOV_RDX_RIP = {std::byte{0x48}, std::byte{0x8B}, std::byte{0x15}};
216 inline constexpr std::array<std::byte, 3> PREFIX_MOV_RBX_RIP = {std::byte{0x48}, std::byte{0x8B}, std::byte{0x1D}};
217 inline constexpr std::array<std::byte, 3> PREFIX_LEA_RAX_RIP = {std::byte{0x48}, std::byte{0x8D}, std::byte{0x05}};
218 inline constexpr std::array<std::byte, 3> PREFIX_LEA_RCX_RIP = {std::byte{0x48}, std::byte{0x8D}, std::byte{0x0D}};
219 inline constexpr std::array<std::byte, 3> PREFIX_LEA_RDX_RIP = {std::byte{0x48}, std::byte{0x8D}, std::byte{0x15}};
220 inline constexpr std::array<std::byte, 1> PREFIX_CALL_REL32 = {std::byte{0xE8}};
221 inline constexpr std::array<std::byte, 1> PREFIX_JMP_REL32 = {std::byte{0xE9}};
222
223 /**
224 * @brief Resolves an absolute address from an x86-64 RIP-relative instruction.
225 * @details Extracts the int32 displacement at the given offset within the instruction
226 * and computes the absolute target: instruction_address + instruction_length + displacement.
227 * @param instruction_address Pointer to the first byte of the instruction.
228 * @param displacement_offset Byte offset from instruction_address to the disp32 field.
229 * @param instruction_length Total length of the instruction in bytes.
230 * @return The resolved absolute address, or RipResolveError on failure.
231 */
232 [[nodiscard]] std::expected<uintptr_t, RipResolveError> resolve_rip_relative(
233 const std::byte *instruction_address,
234 size_t displacement_offset,
235 size_t instruction_length);
236
237 /**
238 * @brief Scans forward from a starting address for an opcode prefix, then resolves the RIP-relative target.
239 * @details Searches up to search_length bytes for the given opcode prefix. Once found,
240 * the displacement is assumed to immediately follow the prefix. The absolute address
241 * is computed as: found_address + instruction_length + displacement.
242 * @param search_start Pointer to the beginning of the search region.
243 * @param search_length Maximum number of bytes to search forward.
244 * @param opcode_prefix The opcode byte sequence to search for (disp32 must follow immediately).
245 * @param instruction_length Total length of the instruction in bytes.
246 * @return The resolved absolute address, or RipResolveError describing the failure.
247 * @warning For indirect-call / indirect-jump forms (`FF 15 disp32`, `FF 25 disp32`)
248 * the returned address is the *pointer slot* (the address that stores
249 * the final target), not the target itself. Dereference it with
250 * `Memory::read_ptr_unsafe` (or an equivalent checked read) to obtain
251 * the callee / jump destination.
252 */
253 [[nodiscard]] std::expected<uintptr_t, RipResolveError> find_and_resolve_rip_relative(
254 const std::byte *search_start,
255 size_t search_length,
256 std::span<const std::byte> opcode_prefix,
257 size_t instruction_length);
258
259 /**
260 * @brief Scans all committed executable memory regions for a byte pattern.
261 * @details Walks the process address space via VirtualQuery, scanning each
262 * committed region with execute permission. Useful for games with
263 * packed or protected binaries that unpack code into anonymous pages
264 * outside any loaded module's address range.
265 * @param pattern The compiled pattern to search for.
266 * @param occurrence Which occurrence to return (1-based). 1 = first match.
267 * @return Pointer to the match (adjusted by pattern offset), or nullptr if not found.
268 * @note Pure-execute pages (`PAGE_EXECUTE` without any read bit) are skipped:
269 * they are not guaranteed readable and dereferencing them raises an
270 * access violation. Only `PAGE_EXECUTE_READ`, `PAGE_EXECUTE_READWRITE`,
271 * and `PAGE_EXECUTE_WRITECOPY` regions are inspected. Guard and
272 * no-access pages are skipped unconditionally.
273 * @note `pattern.offset` is applied to the returned pointer, matching
274 * `find_pattern`. Callers must not add it manually.
275 * @warning A trailing `|` marker (offset == pattern.size()) yields a
276 * one-past pointer; bounds-check before dereferencing.
277 * @note A pattern that straddles a region boundary (e.g. two separately
278 * allocated `PAGE_EXECUTE_READ` regions that happen to be adjacent)
279 * will not be found: each region is scanned independently. PE-loaded
280 * code does not cross section boundaries so normal module scanning is
281 * unaffected, but JIT-compiled code (Mono, Unreal AngelScript) or
282 * heavily unpacked payloads may split contiguous bytes across VAD
283 * entries.
284 */
285 [[nodiscard]] const std::byte *scan_executable_regions(const CompiledPattern &pattern, size_t occurrence = 1);
286
287 /**
288 * @enum ResolveMode
289 * @brief How a cascade candidate's pattern maps to a final address.
290 */
291 enum class ResolveMode : std::uint8_t
292 {
293 Direct, ///< Returned address = match + disp_offset.
294 RipRelative ///< Read int32 displacement at (match + disp_offset), compute match + instr_end_offset + disp.
295 };
296
297 /**
298 * @struct AddrCandidate
299 * @brief One ordered attempt in a cascade.
300 * @details The cascade scans candidates in array order and returns the
301 * first successful resolution. @p name is echoed back in the
302 * ResolveHit on success so callers can log which candidate
303 * won -- useful when multiple patterns cover different game
304 * versions.
305 */
306 struct AddrCandidate
307 {
308 std::string_view name;
309 std::string_view pattern;
310 ResolveMode mode = ResolveMode::Direct;
311 std::ptrdiff_t disp_offset = 0;
312 std::ptrdiff_t instr_end_offset = 0;
313 };
314
315 /**
316 * @enum ResolveError
317 * @brief Reasons a cascade resolve may fail.
318 */
319 enum class ResolveError : std::uint8_t
320 {
321 EmptyCandidates,
322 NoMatch,
323 AllPatternsInvalid,
324 PrologueFallbackNotApplicable
325 };
326
327 /**
328 * @brief Human-readable mapping for ResolveError.
329 */
330 constexpr std::string_view resolve_error_to_string(ResolveError error) noexcept
331 {
332 switch (error)
333 {
334 case ResolveError::EmptyCandidates:
335 return "No candidates supplied";
336 case ResolveError::NoMatch:
337 return "No candidate pattern matched the scanned regions";
338 case ResolveError::AllPatternsInvalid:
339 return "Every candidate pattern failed to parse";
340 case ResolveError::PrologueFallbackNotApplicable:
341 return "Prologue fallback pattern too short to be unique";
342 default:
343 return "Unknown resolve error";
344 }
345 }
346
347 /**
348 * @struct ResolveHit
349 * @brief Successful cascade outcome.
350 * @details @p winning_name aliases the matching candidate's @c name
351 * field. The underlying storage must outlive the ResolveHit
352 * (AddrCandidate arrays typically live in static storage).
353 */
354 struct ResolveHit
355 {
356 std::uintptr_t address{0};
357 std::string_view winning_name;
358 };
359
360 /**
361 * @brief Try candidates in order; return the first successful address.
362 * @details Each candidate's pattern is compiled via parse_aob() and
363 * searched via scan_executable_regions(). Direct mode returns
364 * @c match + disp_offset. RipRelative mode treats @c match +
365 * disp_offset as a disp32 field and resolves against
366 * @c match + instr_end_offset. On success, the winning
367 * candidate's name is logged and returned.
368 *
369 * Logging:
370 * - Info on first success: "<label> resolved via '<name>' at 0x...".
371 * - Warning per candidate whose pattern fails to parse.
372 * - Warning on total failure.
373 *
374 * No per-candidate "miss" log line is produced, keeping
375 * chatty cascades quiet at Info level. The implementation
376 * does not log again when resolve_cascade_with_prologue_fallback()
377 * retries, so callers see exactly one info line on success.
378 *
379 * @param candidates Ordered list of candidates. Empty -> EmptyCandidates.
380 * @param label Human-readable identifier used in log messages.
381 * @return ResolveHit on success; ResolveError on failure.
382 */
383 [[nodiscard]] std::expected<ResolveHit, ResolveError>
384 resolve_cascade(std::span<const AddrCandidate> candidates, std::string_view label);
385
386 /**
387 * @brief Cascade resolver with inline-hooked-prologue recovery.
388 * @details Equivalent to resolve_cascade() on the happy path. If every
389 * candidate fails, rebuilds each Direct-mode candidate's
390 * pattern with the first 5 bytes replaced by
391 * `E9 ?? ?? ?? ??` (the near-JMP signature that SafetyHook
392 * and MinHook write when another mod already hooked the
393 * target) and retries. If the recovery path succeeds the
394 * log line calls this out explicitly.
395 *
396 * RipRelative candidates are skipped in the fallback phase
397 * since they target instructions deeper than the 5-byte
398 * prologue and are unaffected by the overwrite.
399 *
400 * @param candidates Ordered candidates.
401 * @param label Human-readable identifier used in log messages.
402 * @return ResolveHit on success; ResolveError on failure.
403 */
404 [[nodiscard]] std::expected<ResolveHit, ResolveError>
405 resolve_cascade_with_prologue_fallback(std::span<const AddrCandidate> candidates,
406 std::string_view label);
407
408 /**
409 * @brief Cheap heuristic: does @p addr look like the first byte of a
410 * real function body?
411 * @details Reads exactly one byte from @p addr behind a Memory::is_readable()
412 * gate and rejects a small blacklist of bytes that are never the
413 * first opcode of a callable x86-64 function:
414 *
415 * - 0x00 uninitialised page / zero-fill BSS / NULL page
416 * - 0xCC int3 breakpoint / alignment pad / debugger trap
417 * - 0xC2 0xC3 bare RET (stub, not a callable body)
418 *
419 * Returns true for every other byte, including 0xE9 / 0xEB /
420 * the 0xFF 0x25 prefix of an indirect JMP, so a target whose
421 * prologue has already been overwritten by SafetyHook or MinHook
422 * still passes -- the resolver must succeed for nested-hook
423 * scenarios.
424 *
425 * This is the negative complement to
426 * resolve_cascade_with_prologue_fallback(), which is a positive
427 * recovery (rebuild the hooked-prologue pattern and retry). Both
428 * can be used together: the cascade resolves the target, then
429 * this helper filters scan poison if the AOB happened to land on
430 * a zero page or an alignment pad.
431 *
432 * @param addr Absolute address to probe. @p addr == 0 returns false
433 * without reading memory. An unreadable address returns
434 * false (the byte could not be read, so the answer is
435 * "not a prologue").
436 * @return true if the byte at @p addr is not on the poison list and was
437 * readable; false otherwise.
438 */
439 [[nodiscard]] bool is_likely_function_prologue(std::uintptr_t addr) noexcept;
440
441 /**
442 * @enum SimdLevel
443 * @brief Reports the highest SIMD tier available for pattern verification.
444 */
445 enum class SimdLevel
446 {
447 Scalar, ///< No SIMD (byte-by-byte verification)
448 Sse2, ///< SSE2 (16 bytes per iteration)
449 Avx2 ///< AVX2 (32 bytes per iteration, with SSE2 + scalar tail)
450 };
451
452 /**
453 * @brief Returns the SIMD tier that find_pattern() will use at runtime.
454 * @details Reflects both compile-time support (intrinsics available) and
455 * runtime CPU detection (CPUID + OS XGETBV for AVX2).
456 */
457 [[nodiscard]] SimdLevel active_simd_level() noexcept;
458
459 } // namespace Scanner
460 } // namespace DetourModKit
461
462 #endif // DETOURMODKIT_SCANNER_HPP
463