Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
bb_bench.cpp
Go to the documentation of this file.
2#include <cstdint>
3#include <sys/types.h>
4#ifndef __wasm__
5#include "bb_bench.hpp"
6#include <algorithm>
7#include <cassert>
8#include <chrono>
9#include <cmath>
10#include <functional>
11#include <iomanip>
12#include <iostream>
13#include <mutex>
14#include <ostream>
15#include <set>
16#include <sstream>
17#include <thread>
18#include <vector>
19
20namespace {
21// ANSI color codes
22struct Colors {
23 static constexpr const char* WHITE = "\033[37m";
24 static constexpr const char* RESET = "\033[0m";
25 static constexpr const char* BOLD = "\033[1m";
26 static constexpr const char* CYAN = "\033[36m";
27 static constexpr const char* GREEN = "\033[32m";
28 static constexpr const char* YELLOW = "\033[33m";
29 static constexpr const char* MAGENTA = "\033[35m";
30 static constexpr const char* DIM = "\033[2m";
31 static constexpr const char* RED = "\033[31m";
32};
33
34// Format time value with appropriate unit
35std::string format_time(double time_ms)
36{
37 std::ostringstream oss;
38 if (time_ms >= 1000.0) {
39 oss << std::fixed << std::setprecision(2) << (time_ms / 1000.0) << " s";
40 } else if (time_ms >= 1.0 && time_ms < 1000.0) {
41 oss << std::fixed << std::setprecision(2) << time_ms << " ms";
42 } else {
43 oss << std::fixed << std::setprecision(1) << (time_ms * 1000.0) << " μs";
44 }
45 return oss.str();
46}
47
48// Format time with fixed width for alignment
49std::string format_time_aligned(double time_ms)
50{
51 std::ostringstream oss;
52 if (time_ms >= 1000.0) {
53 std::ostringstream time_oss;
54 time_oss << std::fixed << std::setprecision(2) << (time_ms / 1000.0) << "s";
55 oss << std::left << std::setw(10) << time_oss.str();
56 } else {
57 std::ostringstream time_oss;
58 time_oss << std::fixed << std::setprecision(1) << time_ms << "ms";
59 oss << std::left << std::setw(10) << time_oss.str();
60 }
61 return oss.str();
62}
63
64// Helper to format percentage value
65std::string format_percentage_value(double percentage, const char* color)
66{
67 std::ostringstream oss;
68 oss << color << " " << std::left << std::fixed << std::setprecision(1) << std::setw(5) << percentage << "%"
69 << Colors::RESET;
70 return oss.str();
71}
72
73// Helper to format percentage with color based on percentage value
74std::string format_percentage(double value, double total, double min_threshold = 0.0)
75{
76 double percentage = (total <= 0) ? 0.0 : (value / total) * 100.0;
77 if (total <= 0 || percentage < min_threshold) {
78 return " ";
79 }
80
81 // Choose color based on percentage value (like time colors)
82 const char* color = Colors::CYAN; // Default color
83
84 return format_percentage_value(percentage, color);
85}
86
87// Helper to format percentage section
88std::string format_percentage_section(double time_ms, double parent_time, size_t indent_level)
89{
90 if (parent_time > 0 && indent_level > 0) {
91 return format_percentage(time_ms * 1000000.0, parent_time);
92 }
93 return " ";
94}
95
96// Helper to format time section
97std::string format_time_section(double time_ms)
98{
99 std::ostringstream oss;
100 oss << " ";
101 if (time_ms >= 100.0 && time_ms < 1000.0) {
102 oss << Colors::DIM << format_time_aligned(time_ms) << Colors::RESET;
103 } else {
104 oss << format_time_aligned(time_ms);
105 }
106 return oss.str();
107}
108
109// Helper to format call stats
110std::string format_call_stats(double time_ms, uint64_t count)
111{
112 if (!(time_ms >= 100.0 && count > 1)) {
113 return "";
114 }
115 double avg_ms = time_ms / static_cast<double>(count);
116 std::ostringstream oss;
117 oss << Colors::DIM << " (" << format_time(avg_ms) << " x " << count << ")" << Colors::RESET;
118 return oss.str();
119}
120
121std::string format_aligned_section(double time_ms, double parent_time, uint64_t count, size_t indent_level)
122{
123 std::ostringstream oss;
124
125 // Add indent level indicator at the beginning with different color
126 oss << Colors::MAGENTA << "[" << indent_level << "] " << Colors::RESET;
127
128 // Format percentage FIRST
129 oss << format_percentage_section(time_ms, parent_time, indent_level);
130
131 // Format time AFTER percentage with appropriate color (with more spacing)
132 oss << format_time_section(time_ms);
133
134 // Format calls/threads info - only show for >= 100ms, make it DIM
135 oss << format_call_stats(time_ms, count);
136
137 return oss.str();
138}
139
140// Get color based on time threshold
141struct TimeColor {
142 const char* name_color;
143 const char* time_color;
144};
145
146TimeColor get_time_colors(double time_ms)
147{
148 if (time_ms >= 1000.0) {
149 return { Colors::BOLD, Colors::WHITE };
150 }
151 if (time_ms >= 100.0) {
152 return { Colors::YELLOW, Colors::YELLOW };
153 }
154 return { Colors::DIM, Colors::DIM };
155}
156
157// Print separator line
158void print_separator(std::ostream& os, bool thick = true)
159{
160 const char* line = thick ? "═══════════════════════════════════════════════════════════════════════════════════════"
161 "═════════════════════"
162 : "───────────────────────────────────────────────────────────────────────────────────────"
163 "─────────────────────";
164 os << Colors::BOLD << Colors::CYAN << line << Colors::RESET << "\n";
165}
166} // anonymous namespace
167
168namespace bb::detail {
169
170// use_bb_bench is also set by --print_bench and --bench_out flags
171// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
172bool use_bb_bench = std::getenv("BB_BENCH") == nullptr ? false : std::string(std::getenv("BB_BENCH")) == "1";
173// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
174using OperationKey = std::string_view;
175
177{
178 if (stats.count == 0) {
179 return;
180 }
181 // Account for aggregate time and count
182 time += stats.time;
183 count += stats.count;
184 time_max = std::max(static_cast<size_t>(stats.time), time_max);
185 // Use Welford's method to be able to track the variance
186 num_threads++;
187 double delta = static_cast<double>(stats.time) - time_mean;
188 time_mean += delta / static_cast<double>(num_threads);
189 double delta2 = static_cast<double>(stats.time) - time_mean;
190 time_m2 += delta * delta2;
191}
192
194{
195 // Calculate standard deviation
196 if (num_threads > 1) {
197 return std::sqrt(time_m2 / static_cast<double>(num_threads - 1));
198 }
199 return 0;
200}
201
202// Normalize the raw benchmark data into a clean structure for display
204{
205 AggregateData result;
206
207 // Each count has a unique [thread, key] combo.
208 // We therefore treat each count as a thread's contribution to that key.
209 for (const std::shared_ptr<TimeStatsEntry>& entry : entries) {
210 // A map from parent key => AggregateEntry
211 auto& entry_map = result[entry->key];
212 // combine all entries with same parent key
213 std::map<OperationKey, TimeAndCount> parent_key_to_stats;
214
215 // For collection-time performance, we allow multiple stat blocks with the same parent. It'd be simpler to have
216 // one but we just have to combine them here.
217 for (const TimeStats* stats = &entry->count; stats != nullptr; stats = stats->next.get()) {
218 OperationKey parent_key = stats->parent != nullptr ? stats->parent->key : "";
219 parent_key_to_stats[parent_key].count += stats->count;
220 parent_key_to_stats[parent_key].time += stats->time;
221 }
222
223 for (auto [parent_key, stats] : parent_key_to_stats) {
224 auto& normalized_entry = entry_map[parent_key];
225 normalized_entry.key = entry->key;
226 normalized_entry.parent = parent_key;
227 normalized_entry.add_thread_time_sample(stats);
228 }
229 }
230
231 return result;
232}
233
240
242{
244 entry->key = key;
245 entries.push_back(entry);
246}
247
249{
250 std::cout << "GlobalBenchStatsContainer::print() START" << "\n";
251 for (const std::shared_ptr<TimeStatsEntry>& entry : entries) {
252 print_stats_recursive(entry->key, &entry->count, "");
253 }
254 std::cout << "GlobalBenchStatsContainer::print() END" << "\n";
255}
256
258 const TimeStats* stats,
259 const std::string& indent) const
260{
261 if (stats->count > 0) {
262 std::cout << indent << key << "\t" << stats->count << "\n";
263 }
264 if (stats->time > 0) {
265 std::cout << indent << key << "(t)\t" << static_cast<double>(stats->time) / 1000000.0 << "ms\n";
266 }
267
268 if (stats->next != nullptr) {
269 print_stats_recursive(key, stats->next.get(), indent + " ");
270 }
271}
272
273void GlobalBenchStatsContainer::print_aggregate_counts(std::ostream& os, size_t indent) const
274{
275 os << '{';
276 bool first = true;
277 for (const auto& [key, entry_map] : aggregate()) {
278 // Loop for a flattened view
279 uint64_t time = 0;
280 for (auto& [parent_key, entry] : entry_map) {
281 time += entry.time_max;
282 }
283
284 if (!first) {
285 os << ',';
286 }
287 if (indent > 0) {
288 os << "\n" << std::string(indent, ' ');
289 }
290 os << '"' << key << "\":" << time;
291 first = false;
292 }
293 if (indent > 0) {
294 os << "\n";
295 }
296 os << '}' << "\n";
297}
298
300{
301 AggregateData aggregated = aggregate();
302
303 if (aggregated.empty()) {
304 os << "No benchmark data collected\n";
305 return;
306 }
307
308 // Print header
309 os << "\n";
310 print_separator(os, true);
311 os << Colors::BOLD << " Benchmark Results" << Colors::RESET << "\n";
312 print_separator(os, true);
313
315 std::set<OperationKey> printed_in_detail;
316 for (auto& [key, entry_map] : aggregated) {
317 for (auto& [parent_key, entry] : entry_map) {
318 if (entry.count > 0) {
319 keys_to_parents[key].insert(parent_key);
320 }
321 }
322 }
323
324 // Helper function to print a stat line with tree drawing
325 auto print_entry = [&](const AggregateEntry& entry, size_t indent_level, bool is_last, uint64_t parent_time) {
326 std::string indent(indent_level * 2, ' ');
327 std::string prefix = (indent_level == 0) ? "" : (is_last ? "└─ " : "├─ ");
328
329 // Use exactly 80 characters for function name without indent
330 const size_t name_width = 80;
331 std::string display_name = std::string(entry.key);
332 if (display_name.length() > name_width) {
333 display_name = display_name.substr(0, name_width - 3) + "...";
334 }
335
336 double time_ms = static_cast<double>(entry.time_max) / 1000000.0;
337 auto colors = get_time_colors(time_ms);
338
339 // Print indent + prefix + name (exactly 80 chars) + time/percentage/calls
340 os << indent << prefix << colors.name_color;
341 if (time_ms >= 1000.0 && colors.name_color == Colors::BOLD) {
342 os << Colors::YELLOW; // Special case: bold yellow for >= 1s
343 }
344 os << std::left << std::setw(static_cast<int>(name_width)) << display_name << Colors::RESET;
345
346 // Print time if available with aligned section including indent level
347 if (entry.time_max > 0) {
348 if (time_ms < 100.0) {
349 // Minimal format for <100ms: only [level] and percentage, no time display
350 std::ostringstream minimal_oss;
351 minimal_oss << Colors::MAGENTA << "[" << indent_level << "] " << Colors::RESET;
352 minimal_oss << format_percentage_section(time_ms, static_cast<double>(parent_time), indent_level);
353 minimal_oss << " " << std::setw(10) << ""; // Add spacing to replace where time would be
354 os << " " << colors.time_color << std::setw(40) << std::left << minimal_oss.str() << Colors::RESET;
355 } else {
356 std::string aligned_section =
357 format_aligned_section(time_ms, static_cast<double>(parent_time), entry.count, indent_level);
358 os << " " << colors.time_color << std::setw(40) << std::left << aligned_section << Colors::RESET;
359 if (entry.num_threads > 1) {
360 double mean_ms = entry.time_mean / 1000000.0;
361 double stddev_percentage = floor(entry.get_std_dev() * 100 / entry.time_mean);
362 os << " " << entry.num_threads << " threads " << mean_ms << "ms average " << stddev_percentage
363 << "% stddev";
364 }
365 }
366 }
367
368 os << "\n";
369 };
370
371 // Recursive function to print hierarchy
372 std::function<void(OperationKey, size_t, bool, uint64_t, OperationKey)> print_hierarchy;
373 print_hierarchy = [&](OperationKey key,
374 size_t indent_level,
375 bool is_last,
376 uint64_t parent_time,
377 OperationKey current_parent) -> void {
378 auto it = aggregated.find(key);
379 if (it == aggregated.end()) {
380 return;
381 }
382
383 // Find the entry with the specific parent context
384 const AggregateEntry* entry_to_print = nullptr;
385 for (const auto& [parent_key, entry] : it->second) {
386 if ((indent_level == 0 && parent_key.empty()) || (indent_level > 0 && parent_key == current_parent)) {
387 entry_to_print = &entry;
388 break;
389 }
390 }
391
392 if (!entry_to_print) {
393 return;
394 }
395
396 // Print this entry
397 print_entry(*entry_to_print, indent_level, is_last, parent_time);
398
399 // Find and print children - operations that have this key as parent (only those with meaningful time >= 0.5ms)
401 if (!printed_in_detail.contains(key)) {
402 for (const auto& [child_key, parent_map] : aggregated) {
403 for (const auto& [parent_key, entry] : parent_map) {
404 if (parent_key == key && entry.time_max >= 500000) { // 0.5ms in nanoseconds
405 children.push_back(child_key);
406 break;
407 }
408 }
409 }
410 printed_in_detail.insert(key);
411 }
412
413 // Sort children by their time in THIS parent context
414 std::ranges::sort(children, [&](OperationKey a, OperationKey b) {
415 uint64_t time_a = 0;
416 uint64_t time_b = 0;
417 if (auto it = aggregated.find(a); it != aggregated.end()) {
418 for (const auto& [parent_key, entry] : it->second) {
419 if (parent_key == key) {
420 time_a = entry.time_max;
421 break;
422 }
423 }
424 }
425 if (auto it = aggregated.find(b); it != aggregated.end()) {
426 for (const auto& [parent_key, entry] : it->second) {
427 if (parent_key == key) {
428 time_b = entry.time_max;
429 break;
430 }
431 }
432 }
433 return time_a > time_b;
434 });
435
436 // Calculate time spent in children and add "(other)" if >5% unaccounted
437 uint64_t children_total_time = 0;
438 for (const auto& child_key : children) {
439 if (auto it = aggregated.find(child_key); it != aggregated.end()) {
440 for (const auto& [parent_key, entry] : it->second) {
441 if (parent_key == key && entry.time_max >= 500000) { // 0.5ms in nanoseconds
442 children_total_time += entry.time_max;
443 }
444 }
445 }
446 }
447 uint64_t parent_total_time = entry_to_print->time_max;
448 bool should_add_other = false;
449 if (!children.empty() && parent_total_time > 0 && children_total_time < parent_total_time) {
450 uint64_t unaccounted = parent_total_time - children_total_time;
451 double percentage = (static_cast<double>(unaccounted) / static_cast<double>(parent_total_time)) * 100.0;
452 should_add_other = percentage > 5.0 && unaccounted > 0;
453 }
454 uint64_t other_time = should_add_other ? (parent_total_time - children_total_time) : 0;
455
456 if (!children.empty() && keys_to_parents[key].size() > 1) {
457 os << std::string(indent_level * 2, ' ') << " ├─ NOTE: Shared children. Can add up to > 100%.\n";
458 }
459
460 // Print children
461 for (size_t i = 0; i < children.size(); ++i) {
462 bool is_last_child = (i == children.size() - 1) && !should_add_other;
463 print_hierarchy(children[i], indent_level + 1, is_last_child, entry_to_print->time, key);
464 }
465
466 // Print "(other)" category if significant unaccounted time exists
467 if (should_add_other && keys_to_parents[key].size() <= 1) {
468 AggregateEntry other_entry;
469 other_entry.key = "(other)";
470 other_entry.time = other_time;
471 other_entry.time_max = other_time;
472 other_entry.count = 1;
473 other_entry.num_threads = 1;
474 print_entry(other_entry, indent_level + 1, true, parent_total_time); // always last
475 }
476 };
477
478 // Find root entries (those that ONLY have empty parent key and significant time)
480 for (const auto& [key, parent_map] : aggregated) {
481 auto empty_parent_it = parent_map.find("");
482 if (empty_parent_it != parent_map.end() && empty_parent_it->second.time > 0) {
483 roots.push_back(key);
484 }
485 }
486
487 // Sort roots by time (descending)
488 std::ranges::sort(roots, [&](OperationKey a, OperationKey b) {
489 uint64_t time_a = 0;
490 uint64_t time_b = 0;
491 if (auto it_a = aggregated.find(a); it_a != aggregated.end()) {
492 if (auto parent_it = it_a->second.find(""); parent_it != it_a->second.end()) {
493 time_a = parent_it->second.time_max;
494 }
495 }
496 if (auto it_b = aggregated.find(b); it_b != aggregated.end()) {
497 if (auto parent_it = it_b->second.find(""); parent_it != it_b->second.end()) {
498 time_b = parent_it->second.time_max;
499 }
500 }
501 return time_a > time_b;
502 });
503
504 // Print hierarchies starting from roots
505 for (size_t i = 0; i < roots.size(); ++i) {
506 print_hierarchy(roots[i], 0, i == roots.size() - 1, 0, "");
507 }
508
509 // Print summary
510 print_separator(os, false);
511
512 // Calculate totals from root entries
513 std::set<OperationKey> unique_funcs;
514 for (const auto& [key, _] : aggregated) {
515 unique_funcs.insert(key);
516 }
517 size_t unique_functions_count = unique_funcs.size();
518
519 uint64_t shared_count = 0;
520 for (const auto& [key, parents] : keys_to_parents) {
521 if (parents.size() > 1) {
522 shared_count++;
523 }
524 }
525
526 uint64_t total_time = 0;
527 for (const auto& [_, parent_map] : aggregated) {
528 if (auto it = parent_map.find(""); it != parent_map.end()) {
529 total_time = std::max(static_cast<size_t>(total_time), it->second.time_max);
530 }
531 }
532
533 uint64_t total_calls = 0;
534 for (const auto& [_, parent_map] : aggregated) {
535 for (const auto& [__, entry] : parent_map) {
536 total_calls += entry.count;
537 }
538 }
539
540 double total_time_ms = static_cast<double>(total_time) / 1000000.0;
541
542 os << " " << Colors::BOLD << "Total: " << Colors::RESET << Colors::MAGENTA << unique_functions_count
543 << " functions" << Colors::RESET;
544 if (shared_count > 0) {
545 os << " (" << Colors::RED << shared_count << " shared" << Colors::RESET << ")";
546 }
547 os << ", " << Colors::GREEN << total_calls << " measurements" << Colors::RESET << ", " << Colors::YELLOW;
548 if (total_time_ms >= 1000.0) {
549 os << std::fixed << std::setprecision(2) << (total_time_ms / 1000.0) << " seconds";
550 } else {
551 os << std::fixed << std::setprecision(2) << total_time_ms << " ms";
552 }
553 os << Colors::RESET;
554
555 os << "\n";
556 print_separator(os, true);
557 os << "\n";
558}
559
561{
564 entry->count = TimeStats();
565 }
566}
567
568// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
570
572 : parent(nullptr)
573 , stats(entry)
574 , time(0)
575{
576 if (stats == nullptr) {
577 return;
578 }
579 // Track the current parent context
581 auto now = std::chrono::high_resolution_clock::now();
582 auto now_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(now);
583 time = static_cast<uint64_t>(now_ns.time_since_epoch().count());
584}
586{
587 if (stats == nullptr) {
588 return;
589 }
590 auto now = std::chrono::high_resolution_clock::now();
591 auto now_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(now);
592 // Add, taking advantage of our parent context
593 stats->count.track(parent, static_cast<uint64_t>(now_ns.time_since_epoch().count()) - time);
594
595 // Unwind to previous parent
597}
598} // namespace bb::detail
599#endif
FF a
FF b
GlobalBenchStatsContainer GLOBAL_BENCH_STATS
Definition bb_bench.cpp:569
std::unordered_map< OperationKey, std::map< OperationKey, AggregateEntry > > AggregateData
Definition bb_bench.hpp:78
bool use_bb_bench
Definition bb_bench.cpp:172
std::string_view OperationKey
Definition bb_bench.cpp:174
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
Definition bb_bench.hpp:57
std::size_t count
Definition bb_bench.hpp:62
void add_thread_time_sample(const TimeAndCount &stats)
Definition bb_bench.cpp:176
double time_m2
Definition bb_bench.hpp:69
double time_mean
Definition bb_bench.hpp:64
std::size_t time
Definition bb_bench.hpp:61
size_t num_threads
Definition bb_bench.hpp:63
double get_std_dev() const
Definition bb_bench.cpp:193
OperationKey key
Definition bb_bench.hpp:59
std::size_t time_max
Definition bb_bench.hpp:65
BenchReporter(TimeStatsEntry *entry)
Definition bb_bench.cpp:571
TimeStatsEntry * parent
Definition bb_bench.hpp:181
TimeStatsEntry * stats
Definition bb_bench.hpp:182
void print_stats_recursive(const OperationKey &key, const TimeStats *stats, const std::string &indent) const
Definition bb_bench.cpp:257
void print_aggregate_counts_hierarchical(std::ostream &) const
Definition bb_bench.cpp:299
void print_aggregate_counts(std::ostream &, size_t) const
Definition bb_bench.cpp:273
void add_entry(const char *key, const std::shared_ptr< TimeStatsEntry > &entry)
Definition bb_bench.cpp:241
std::vector< std::shared_ptr< TimeStatsEntry > > entries
Definition bb_bench.hpp:86
static thread_local TimeStatsEntry * parent
Definition bb_bench.hpp:83
Definition bb_bench.hpp:153
TimeStats count
Definition bb_bench.hpp:155
std::unique_ptr< TimeStats > next
Definition bb_bench.hpp:111
void track(TimeStatsEntry *current_parent, std::size_t time_val)
Definition bb_bench.hpp:120
#define RED
#define RESET