8#ifndef NO_MULTITHREADING
12uint32_t& get_num_cores_ref()
14 static thread_local const char* val =
std::getenv(
"HARDWARE_CONCURRENCY");
15 static thread_local uint32_t cores =
26#ifdef NO_MULTITHREADING
27 throw_or_abort(
"Cannot set hardware concurrency when multithreading is disabled.");
29 get_num_cores_ref() =
static_cast<uint32_t
>(num_cores);
35#ifdef NO_MULTITHREADING
38 return static_cast<size_t>(get_num_cores_ref());
111void parallel_for(
size_t num_iterations,
const std::function<
void(
size_t)>& func)
113#ifdef NO_MULTITHREADING
114 for (
size_t i = 0; i < num_iterations; ++i) {
118#ifdef OMP_MULTITHREADING
142 const std::function<
void(
size_t,
size_t)>& func,
143 size_t no_multhreading_if_less_or_equal)
145 if (num_points <= no_multhreading_if_less_or_equal) {
153 const size_t chunk_size = (num_points / num_cpus) + (num_points % num_cpus == 0 ? 0 : 1);
155 parallel_for(num_cpus, [num_points, chunk_size, &func](
size_t chunk_index) {
157 if (chunk_size * chunk_index > num_points) {
161 size_t current_chunk_size = std::min(num_points - (chunk_size * chunk_index), chunk_size);
162 if (current_chunk_size == 0) {
165 size_t start = chunk_index * chunk_size;
166 size_t end = chunk_index * chunk_size + current_chunk_size;
172 const std::function<
void(
size_t,
size_t,
size_t)>& func,
173 size_t heuristic_cost)
179 constexpr size_t PARALLEL_FOR_COST = 400000;
184 const size_t chunk_size = (num_points / num_cpus) + (num_points % num_cpus == 0 ? 0 : 1);
187 const size_t offset_cost = (num_points - chunk_size) * heuristic_cost;
190 if (offset_cost < PARALLEL_FOR_COST) {
191 func(0, num_points, 0);
195 parallel_for(num_cpus, [num_points, chunk_size, &func](
size_t chunk_index) {
197 if (chunk_size * chunk_index > num_points) {
201 size_t current_chunk_size = std::min(num_points - (chunk_size * chunk_index), chunk_size);
202 if (current_chunk_size == 0) {
205 size_t start = chunk_index * chunk_size;
206 size_t end = chunk_index * chunk_size + current_chunk_size;
208 func(start, end, chunk_index);
215 const size_t thread_size = num_iterations / num_threads;
218 std::vector<size_t> start(num_threads);
219 std::vector<size_t> end(num_threads);
220 for (
size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
221 start[thread_idx] = thread_idx * thread_size;
222 end[thread_idx] = (thread_idx == num_threads - 1) ? num_iterations : (thread_idx + 1) * thread_size;
241 size_t desired_num_threads = num_iterations / min_iterations_per_thread;
242 size_t num_threads = std::min(desired_num_threads, max_num_threads);
243 num_threads = num_threads > 0 ? num_threads : 1;
257 size_t desired_num_threads = num_iterations / min_iterations_per_thread;
258 desired_num_threads =
static_cast<size_t>(1ULL <<
numeric::get_msb(desired_num_threads));
259 size_t num_threads = std::min(desired_num_threads, max_num_threads);
260 num_threads = num_threads > 0 ? num_threads : 1;
uint32_t env_hardware_concurrency()
constexpr T get_msb(const T in)
Entry point for Barretenberg command-line interface.
void parallel_for_mutex_pool(size_t num_iterations, const std::function< void(size_t)> &func)
MultithreadData calculate_thread_data(size_t num_iterations, size_t min_iterations_per_thread)
Calculates number of threads and index bounds for each thread.
void parallel_for_queued(size_t num_iterations, const std::function< void(size_t)> &func)
size_t get_num_cpus_pow2()
void parallel_for_moody(size_t num_iterations, const std::function< void(size_t)> &func)
size_t calculate_num_threads(size_t num_iterations, size_t min_iterations_per_thread)
calculates number of threads to create based on minimum iterations per thread
size_t calculate_num_threads_pow2(size_t num_iterations, size_t min_iterations_per_thread)
calculates number of threads to create based on minimum iterations per thread, guaranteed power of 2
void parallel_for_atomic_pool(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
void parallel_for_spawning(size_t num_iterations, const std::function< void(size_t)> &func)
void set_parallel_for_concurrency(size_t num_cores)
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_omp(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
void throw_or_abort(std::string const &err)