Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
bb_bench.hpp
Go to the documentation of this file.
1
2#pragma once
3
5#include <iostream>
6#include <map>
7#include <memory>
8#include <ostream>
9#include <string_view>
10#include <tracy/Tracy.hpp>
11#include <unordered_map>
12#include <vector>
13
19namespace bb::detail {
20// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
21extern bool use_bb_bench;
22
23// Compile-time string
24// See e.g. https://www.reddit.com/r/cpp_questions/comments/pumi9r/does_c20_not_support_string_literals_as_template/
25template <std::size_t N> struct OperationLabel {
26 constexpr static std::size_t size() { return N; }
27 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
28 constexpr OperationLabel(const char (&str)[N])
29 {
30 for (std::size_t i = 0; i < N; ++i) {
31 value[i] = str[i];
32 }
33 }
34
35 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
36 char value[N];
37};
38
39template <OperationLabel op1, OperationLabel op2> constexpr auto concat()
40{
41 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
42 char result_cstr[op1.size() + op2.size() - 1] = {};
43 std::copy(op1.value, op1.value + op1.size() - 1, result_cstr);
44 std::copy(op2.value, op2.value + op2.size(), result_cstr + op1.size() - 1);
45 return OperationLabel{ result_cstr };
46}
47struct TimeStats;
48struct TimeStatsEntry;
49using OperationKey = std::string_view;
50
52 uint64_t time = 0;
53 uint64_t count = 0;
54};
55
56// Normalized benchmark entry - each represents a unique (function, parent) pair
58 // For convenience, even though redundant with map store
63 size_t num_threads = 0;
64 double time_mean = 0;
66 double time_stddev = 0;
67
68 // Welford's algorithm state
69 double time_m2 = 0; // sum of squared differences from mean
70
71 void add_thread_time_sample(const TimeAndCount& stats);
72 double get_std_dev() const;
73};
74
75// AggregateData: Result of normalizing benchmark data
76// entries: Key -> ParentKey -> Entry
77// Empty string is used as key if the entry has no parent.
79
80// Contains all statically known op counts
82 public:
83 static inline thread_local TimeStatsEntry* parent = nullptr;
85 std::mutex mutex;
87 void print() const;
88 // NOTE: Should be called when other threads aren't active
89 void clear();
90 void add_entry(const char* key, const std::shared_ptr<TimeStatsEntry>& entry);
91 void print_stats_recursive(const OperationKey& key, const TimeStats* stats, const std::string& indent) const;
92 void print_aggregate_counts(std::ostream&, size_t) const;
93 void print_aggregate_counts_hierarchical(std::ostream&) const;
94
95 // Normalize the raw benchmark data into a clean structure for display
97};
98
99// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
101
102// Tracks operation statistics and links them to their immediate parent context.
103// Each stat is associated only with its direct parent, not the full call hierarchy.
104// This allows measuring the direct contribution of nested operations to their parent,
105// but doesn't provide recursive parent-child relationships through the entire call stack.
106struct TimeStats {
110 // Used if the parent changes from last call - chains to handle multiple parent contexts
112
113 TimeStats() = default;
114 TimeStats(TimeStatsEntry* parent_ptr, std::size_t count_val, std::size_t time_val)
115 : parent(parent_ptr)
116 , count(count_val)
117 , time(time_val)
118 {}
119
120 void track(TimeStatsEntry* current_parent, std::size_t time_val)
121 {
122 // Try to track with current stats if parent matches
123 // Check if 'next' already handles this parent to avoid creating duplicates
124 if (raw_track(current_parent, time_val) || (next && next->raw_track(current_parent, time_val))) {
125 return;
126 }
127 // Create new TimeStats at the front of this linked list.
128 auto new_next = std::make_unique<TimeStats>(parent, count, time);
129 new_next->next = std::move(next);
130 next = std::move(new_next);
131
132 // Reset this node.
133 parent = current_parent;
134 count = 1;
135 time = time_val;
136 }
137
138 private:
139 // Returns true if successfully tracked (parent matches), false otherwise
140 bool raw_track(TimeStatsEntry* expected_parent, std::size_t time_val)
141 {
142 if (parent != expected_parent) {
143 return false;
144 }
145 count++;
146 time += time_val;
147 return true;
148 }
149};
150
151// Each key will appear at most once *per thread*.
152// Each thread has its own count for thread-safety.
157
158// The stat entry associated with a certain label AND a certain thread.
159// These will later be aggregated, and the TimeStats itself contains stat
160// entries for each caller context change (for later summarization).
161template <OperationLabel Op> struct ThreadBenchStats {
162 public:
163 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
164 static inline thread_local std::shared_ptr<TimeStatsEntry> stats;
165
166 static void init_entry(TimeStatsEntry& entry);
167 // returns null if use_bb_bench not enabled
176};
177
178// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
179// no-op if passed null stats
187} // namespace bb::detail
188
189// Define macros. we use void(0) for empty ones as we want these to be statements that need a semicolon.
190#ifdef TRACY_INSTRUMENTED
191#define BB_TRACY() ZoneScopedN(__func__)
192#define BB_TRACY_NAME(name) ZoneScopedN(name)
193#define BB_BENCH_TRACY() ZoneScopedN(__func__)
194#define BB_BENCH_TRACY_NAME(name) ZoneScopedN(name)
195#define BB_BENCH_ONLY_NAME(name) (void)0
196#define BB_BENCH_ENABLE_NESTING() (void)0
197#define BB_BENCH_ONLY() (void)0
198#elif defined __wasm__
199#define BB_TRACY() (void)0
200#define BB_TRACY_NAME(name) (void)0
201#define BB_BENCH_TRACY() (void)0
202#define BB_BENCH_TRACY_NAME(name) (void)0
203#define BB_BENCH_ONLY_NAME(name) (void)0
204#define BB_BENCH_ENABLE_NESTING() (void)0
205#define BB_BENCH_ONLY() (void)0
206#else
207#define BB_TRACY() (void)0
208#define BB_TRACY_NAME(name) (void)0
209#define BB_BENCH_TRACY() BB_BENCH_ONLY_NAME(__func__)
210#define BB_BENCH_TRACY_NAME(name) BB_BENCH_ONLY_NAME(name)
211#define BB_BENCH_ONLY_NAME(name) \
212 bb::detail::BenchReporter _bb_bench_reporter((bb::detail::ThreadBenchStats<name>::ensure_stats().get()))
213#define BB_BENCH_ENABLE_NESTING() \
214 if (_bb_bench_reporter.stats) \
215 bb::detail::GlobalBenchStatsContainer::parent = _bb_bench_reporter.stats
216#define BB_BENCH_ONLY() BB_BENCH_ONLY_NAME(__func__)
217#endif
218#define BB_BENCH_NAME(name) \
219 BB_BENCH_TRACY_NAME(name); \
220 BB_BENCH_ENABLE_NESTING()
221
222#define BB_BENCH() \
223 BB_BENCH_TRACY(); \
224 BB_BENCH_ENABLE_NESTING()
#define BB_UNLIKELY(x)
GlobalBenchStatsContainer GLOBAL_BENCH_STATS
Definition bb_bench.cpp:569
constexpr auto concat()
Definition bb_bench.hpp:39
std::unordered_map< OperationKey, std::map< OperationKey, AggregateEntry > > AggregateData
Definition bb_bench.hpp:78
bool use_bb_bench
Definition bb_bench.cpp:172
std::string_view OperationKey
Definition bb_bench.cpp:174
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
Definition bb_bench.hpp:57
std::size_t count
Definition bb_bench.hpp:62
void add_thread_time_sample(const TimeAndCount &stats)
Definition bb_bench.cpp:176
double time_stddev
Definition bb_bench.hpp:66
double time_m2
Definition bb_bench.hpp:69
double time_mean
Definition bb_bench.hpp:64
OperationKey parent
Definition bb_bench.hpp:60
std::size_t time
Definition bb_bench.hpp:61
size_t num_threads
Definition bb_bench.hpp:63
double get_std_dev() const
Definition bb_bench.cpp:193
OperationKey key
Definition bb_bench.hpp:59
std::size_t time_max
Definition bb_bench.hpp:65
TimeStatsEntry * parent
Definition bb_bench.hpp:181
TimeStatsEntry * stats
Definition bb_bench.hpp:182
void print_stats_recursive(const OperationKey &key, const TimeStats *stats, const std::string &indent) const
Definition bb_bench.cpp:257
void print_aggregate_counts_hierarchical(std::ostream &) const
Definition bb_bench.cpp:299
void print_aggregate_counts(std::ostream &, size_t) const
Definition bb_bench.cpp:273
void add_entry(const char *key, const std::shared_ptr< TimeStatsEntry > &entry)
Definition bb_bench.cpp:241
std::vector< std::shared_ptr< TimeStatsEntry > > entries
Definition bb_bench.hpp:86
static thread_local TimeStatsEntry * parent
Definition bb_bench.hpp:83
static constexpr std::size_t size()
Definition bb_bench.hpp:26
constexpr OperationLabel(const char(&str)[N])
Definition bb_bench.hpp:28
static std::shared_ptr< TimeStatsEntry > ensure_stats()
Definition bb_bench.hpp:168
static void init_entry(TimeStatsEntry &entry)
static thread_local std::shared_ptr< TimeStatsEntry > stats
Definition bb_bench.hpp:164
Definition bb_bench.hpp:153
OperationKey key
Definition bb_bench.hpp:154
TimeStats count
Definition bb_bench.hpp:155
bool raw_track(TimeStatsEntry *expected_parent, std::size_t time_val)
Definition bb_bench.hpp:140
TimeStatsEntry * parent
Definition bb_bench.hpp:107
std::unique_ptr< TimeStats > next
Definition bb_bench.hpp:111
TimeStats(TimeStatsEntry *parent_ptr, std::size_t count_val, std::size_t time_val)
Definition bb_bench.hpp:114
void track(TimeStatsEntry *current_parent, std::size_t time_val)
Definition bb_bench.hpp:120