Apply clang-format

This commit is contained in:
Tobias Wood
2023-11-29 11:12:48 +00:00
parent 9ea520fc45
commit f38e16c193
534 changed files with 103368 additions and 116934 deletions

View File

@@ -53,8 +53,7 @@ class EventCount {
public:
class Waiter;
EventCount(MaxSizeVector<Waiter>& waiters)
: state_(kStackMask), waiters_(waiters) {
EventCount(MaxSizeVector<Waiter>& waiters) : state_(kStackMask), waiters_(waiters) {
eigen_plain_assert(waiters.size() < (1 << kWaiterBits) - 1);
}
@@ -72,9 +71,7 @@ class EventCount {
CheckState(state);
uint64_t newstate = state + kWaiterInc;
CheckState(newstate);
if (state_.compare_exchange_weak(state, newstate,
std::memory_order_seq_cst))
return;
if (state_.compare_exchange_weak(state, newstate, std::memory_order_seq_cst)) return;
}
}
@@ -93,12 +90,10 @@ class EventCount {
} else {
// Remove this thread from pre-wait counter and add to the waiter stack.
newstate = ((state & kWaiterMask) - kWaiterInc) | me;
w->next.store(state & (kStackMask | kEpochMask),
std::memory_order_relaxed);
w->next.store(state & (kStackMask | kEpochMask), std::memory_order_relaxed);
}
CheckState(newstate);
if (state_.compare_exchange_weak(state, newstate,
std::memory_order_acq_rel)) {
if (state_.compare_exchange_weak(state, newstate, std::memory_order_acq_rel)) {
if ((state & kSignalMask) == 0) {
w->epoch += kEpochInc;
Park(w);
@@ -118,13 +113,9 @@ class EventCount {
// so we should not consume a signal unconditionally.
// Only if number of waiters is equal to number of signals,
// we know that the thread was notified and we must take away the signal.
if (((state & kWaiterMask) >> kWaiterShift) ==
((state & kSignalMask) >> kSignalShift))
newstate -= kSignalInc;
if (((state & kWaiterMask) >> kWaiterShift) == ((state & kSignalMask) >> kSignalShift)) newstate -= kSignalInc;
CheckState(newstate);
if (state_.compare_exchange_weak(state, newstate,
std::memory_order_acq_rel))
return;
if (state_.compare_exchange_weak(state, newstate, std::memory_order_acq_rel)) return;
}
}
@@ -142,8 +133,7 @@ class EventCount {
uint64_t newstate;
if (notifyAll) {
// Empty wait stack and set signal to number of pre-wait threads.
newstate =
(state & kWaiterMask) | (waiters << kSignalShift) | kStackMask;
newstate = (state & kWaiterMask) | (waiters << kSignalShift) | kStackMask;
} else if (signals < waiters) {
// There is a thread in pre-wait state, unblock it.
newstate = state + kSignalInc;
@@ -154,10 +144,8 @@ class EventCount {
newstate = (state & (kWaiterMask | kSignalMask)) | next;
}
CheckState(newstate);
if (state_.compare_exchange_weak(state, newstate,
std::memory_order_acq_rel)) {
if (!notifyAll && (signals < waiters))
return; // unblocked pre-wait thread
if (state_.compare_exchange_weak(state, newstate, std::memory_order_acq_rel)) {
if (!notifyAll && (signals < waiters)) return; // unblocked pre-wait thread
if ((state & kStackMask) == kStackMask) return;
Waiter* w = &waiters_[state & kStackMask];
if (!notifyAll) w->next.store(kStackMask, std::memory_order_relaxed);
@@ -195,12 +183,10 @@ class EventCount {
static const uint64_t kWaiterBits = 14;
static const uint64_t kStackMask = (1ull << kWaiterBits) - 1;
static const uint64_t kWaiterShift = kWaiterBits;
static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1)
<< kWaiterShift;
static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1) << kWaiterShift;
static const uint64_t kWaiterInc = 1ull << kWaiterShift;
static const uint64_t kSignalShift = 2 * kWaiterBits;
static const uint64_t kSignalMask = ((1ull << kWaiterBits) - 1)
<< kSignalShift;
static const uint64_t kSignalMask = ((1ull << kWaiterBits) - 1) << kSignalShift;
static const uint64_t kSignalInc = 1ull << kSignalShift;
static const uint64_t kEpochShift = 3 * kWaiterBits;
static const uint64_t kEpochBits = 64 - kEpochShift;

View File

@@ -1,3 +1,4 @@
#ifndef EIGEN_THREADPOOL_MODULE_H
#error "Please include unsupported/Eigen/CXX11/ThreadPool instead of including headers inside the src directory directly."
#error \
"Please include unsupported/Eigen/CXX11/ThreadPool instead of including headers inside the src directory directly."
#endif

View File

@@ -21,11 +21,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
typedef typename Environment::Task Task;
typedef RunQueue<Task, 1024> Queue;
ThreadPoolTempl(int num_threads, Environment env = Environment())
: ThreadPoolTempl(num_threads, true, env) {}
ThreadPoolTempl(int num_threads, Environment env = Environment()) : ThreadPoolTempl(num_threads, true, env) {}
ThreadPoolTempl(int num_threads, bool allow_spinning,
Environment env = Environment())
ThreadPoolTempl(int num_threads, bool allow_spinning, Environment env = Environment())
: env_(env),
num_threads_(num_threads),
allow_spinning_(allow_spinning),
@@ -57,8 +55,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
thread_data_.resize(num_threads_);
for (int i = 0; i < num_threads_; i++) {
SetStealPartition(i, EncodePartition(0, num_threads_));
thread_data_[i].thread.reset(
env_.CreateThread([this, i]() { WorkerLoop(i); }));
thread_data_[i].thread.reset(env_.CreateThread([this, i]() { WorkerLoop(i); }));
}
#ifndef EIGEN_THREAD_LOCAL
// Wait for workers to initialize per_thread_map_. Otherwise we might race
@@ -84,8 +81,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
}
// Join threads explicitly (by destroying) to avoid destruction order within
// this class.
for (size_t i = 0; i < thread_data_.size(); ++i)
thread_data_[i].thread.reset();
for (size_t i = 0; i < thread_data_.size(); ++i) thread_data_[i].thread.reset();
}
void SetStealPartitions(const std::vector<std::pair<unsigned, unsigned>>& partitions) {
@@ -101,12 +97,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
}
}
void Schedule(std::function<void()> fn) EIGEN_OVERRIDE {
ScheduleWithHint(std::move(fn), 0, num_threads_);
}
void Schedule(std::function<void()> fn) EIGEN_OVERRIDE { ScheduleWithHint(std::move(fn), 0, num_threads_); }
void ScheduleWithHint(std::function<void()> fn, int start,
int limit) override {
void ScheduleWithHint(std::function<void()> fn, int start, int limit) override {
Task t = env_.CreateTask(std::move(fn));
PerThread* pt = GetPerThread();
if (pt->pool == this) {
@@ -175,9 +168,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
static const int kMaxPartitionBits = 16;
static const int kMaxThreads = 1 << kMaxPartitionBits;
inline unsigned EncodePartition(unsigned start, unsigned limit) {
return (start << kMaxPartitionBits) | limit;
}
inline unsigned EncodePartition(unsigned start, unsigned limit) { return (start << kMaxPartitionBits) | limit; }
inline void DecodePartition(unsigned val, unsigned* start, unsigned* limit) {
*limit = val & (kMaxThreads - 1);
@@ -195,9 +186,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
thread_data_[i].steal_partition.store(val, std::memory_order_relaxed);
}
inline unsigned GetStealPartition(int i) {
return thread_data_[i].steal_partition.load(std::memory_order_relaxed);
}
inline unsigned GetStealPartition(int i) { return thread_data_[i].steal_partition.load(std::memory_order_relaxed); }
void ComputeCoprimes(int N, MaxSizeVector<unsigned>* coprimes) {
for (int i = 1; i <= N; i++) {
@@ -275,8 +264,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// proportional to num_threads_ and we assume that new work is scheduled at
// a constant rate, so we set spin_count to 5000 / num_threads_. The
// constant was picked based on a fair dice roll, tune it.
const int spin_count =
allow_spinning_ && num_threads_ > 0 ? 5000 / num_threads_ : 0;
const int spin_count = allow_spinning_ && num_threads_ > 0 ? 5000 / num_threads_ : 0;
if (num_threads_ == 1) {
// For num_threads_ == 1 there is no point in going through the expensive
// steal loop. Moreover, since NonEmptyQueueIndex() calls PopBack() on the
@@ -342,9 +330,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
unsigned r = Rand(&pt->rand);
// Reduce r into [0, size) range, this utilizes trick from
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
eigen_plain_assert(all_coprimes_[size - 1].size() < (1<<30));
eigen_plain_assert(all_coprimes_[size - 1].size() < (1 << 30));
unsigned victim = ((uint64_t)r * (uint64_t)size) >> 32;
unsigned index = ((uint64_t) all_coprimes_[size - 1].size() * (uint64_t)r) >> 32;
unsigned index = ((uint64_t)all_coprimes_[size - 1].size() * (uint64_t)r) >> 32;
unsigned inc = all_coprimes_[size - 1][index];
for (unsigned i = 0; i < size; i++) {
@@ -376,10 +364,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
}
// Steals work from any other thread in the pool.
Task GlobalSteal() {
return Steal(0, num_threads_);
}
Task GlobalSteal() { return Steal(0, num_threads_); }
// WaitForWork blocks until new work is available (returns true), or if it is
// time to exit (returns false). Can optionally return a task to execute in t
@@ -477,8 +462,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// Update the internal state
*state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
// Generate the random output (using the PCG-XSH-RS scheme)
return static_cast<unsigned>((current ^ (current >> 22)) >>
(22 + (current >> 61)));
return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
}
};

View File

@@ -45,8 +45,7 @@ class RunQueue {
eigen_plain_assert((kSize & (kSize - 1)) == 0);
eigen_plain_assert(kSize > 2); // why would you do this?
eigen_plain_assert(kSize <= (64 << 10)); // leave enough space for counter
for (unsigned i = 0; i < kSize; i++)
array_[i].state.store(kEmpty, std::memory_order_relaxed);
for (unsigned i = 0; i < kSize; i++) array_[i].state.store(kEmpty, std::memory_order_relaxed);
}
~RunQueue() { eigen_plain_assert(Size() == 0); }
@@ -57,9 +56,7 @@ class RunQueue {
unsigned front = front_.load(std::memory_order_relaxed);
Elem* e = &array_[front & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
if (s != kEmpty ||
!e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
return w;
if (s != kEmpty || !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) return w;
front_.store(front + 1 + (kSize << 1), std::memory_order_relaxed);
e->w = std::move(w);
e->state.store(kReady, std::memory_order_release);
@@ -72,9 +69,7 @@ class RunQueue {
unsigned front = front_.load(std::memory_order_relaxed);
Elem* e = &array_[(front - 1) & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
if (s != kReady ||
!e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
return Work();
if (s != kReady || !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) return Work();
Work w = std::move(e->w);
e->state.store(kEmpty, std::memory_order_release);
front = ((front - 1) & kMask2) | (front & ~kMask2);
@@ -89,9 +84,7 @@ class RunQueue {
unsigned back = back_.load(std::memory_order_relaxed);
Elem* e = &array_[(back - 1) & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
if (s != kEmpty ||
!e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
return w;
if (s != kEmpty || !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) return w;
back = ((back - 1) & kMask2) | (back & ~kMask2);
back_.store(back, std::memory_order_relaxed);
e->w = std::move(w);
@@ -106,9 +99,7 @@ class RunQueue {
unsigned back = back_.load(std::memory_order_relaxed);
Elem* e = &array_[back & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
if (s != kReady ||
!e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
return Work();
if (s != kReady || !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) return Work();
Work w = std::move(e->w);
e->state.store(kEmpty, std::memory_order_release);
back_.store(back + 1 + (kSize << 1), std::memory_order_relaxed);
@@ -130,9 +121,7 @@ class RunQueue {
Elem* e = &array_[mid & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
if (n == 0) {
if (s != kReady || !e->state.compare_exchange_strong(
s, kBusy, std::memory_order_acquire))
continue;
if (s != kReady || !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) continue;
start = mid;
} else {
// Note: no need to store temporal kBusy, we exclusively own these
@@ -143,8 +132,7 @@ class RunQueue {
e->state.store(kEmpty, std::memory_order_release);
n++;
}
if (n != 0)
back_.store(start + 1 + (kSize << 1), std::memory_order_relaxed);
if (n != 0) back_.store(start + 1 + (kSize << 1), std::memory_order_relaxed);
return n;
}
@@ -190,7 +178,7 @@ class RunQueue {
// SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false,
// only whether the size is 0 is guaranteed to be correct.
// Can be called by any thread at any time.
template<bool NeedSizeEstimate>
template <bool NeedSizeEstimate>
unsigned SizeOrNotEmpty() const {
// Emptiness plays critical role in thread pool blocking. So we go to great
// effort to not produce false positives (claim non-empty queue as empty).
@@ -217,8 +205,7 @@ class RunQueue {
}
}
EIGEN_ALWAYS_INLINE
unsigned CalculateSize(unsigned front, unsigned back) const {
EIGEN_ALWAYS_INLINE unsigned CalculateSize(unsigned front, unsigned back) const {
int size = (front & kMask2) - (back & kMask2);
// Fix overflow.
if (size < 0) size += 2 * kSize;

View File

@@ -12,12 +12,10 @@
// Try to come up with a portable way to cancel a thread
#if EIGEN_OS_GNULINUX
#define EIGEN_THREAD_CANCEL(t) \
pthread_cancel(t.native_handle());
#define EIGEN_SUPPORTS_THREAD_CANCELLATION 1
#define EIGEN_THREAD_CANCEL(t) pthread_cancel(t.native_handle());
#define EIGEN_SUPPORTS_THREAD_CANCELLATION 1
#else
#define EIGEN_THREAD_CANCEL(t)
#endif
#endif // EIGEN_CXX11_THREADPOOL_THREAD_CANCEL_H

View File

@@ -27,7 +27,7 @@ struct StlThreadEnvironment {
EnvThread(std::function<void()> f) : thr_(std::move(f)) {}
~EnvThread() { thr_.join(); }
// This function is called when the threadpool is cancelled.
void OnCancel() { }
void OnCancel() {}
private:
std::thread thr_;

View File

@@ -18,7 +18,7 @@
#else
#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC )
#if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC)
#define EIGEN_THREAD_LOCAL static thread_local
#endif
@@ -31,8 +31,8 @@
#endif
// Checks whether C++11's `thread_local` storage duration specifier is
// supported.
#if EIGEN_COMP_CLANGAPPLE && ((EIGEN_COMP_CLANGAPPLE < 8000042) || \
(TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0))
#if EIGEN_COMP_CLANGAPPLE && \
((EIGEN_COMP_CLANGAPPLE < 8000042) || (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0))
// Notes: Xcode's clang did not support `thread_local` until version
// 8, and even then not for all iOS < 9.0.
#undef EIGEN_THREAD_LOCAL
@@ -49,8 +49,7 @@
#if __has_include(<android/ndk-version.h>)
#include <android/ndk-version.h>
#endif // __has_include(<android/ndk-version.h>)
#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \
defined(__NDK_MINOR__) && \
#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && defined(__NDK_MINOR__) && \
((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1)))
#undef EIGEN_THREAD_LOCAL
#endif
@@ -108,22 +107,18 @@ struct ThreadLocalNoOpRelease {
// Somewhat similar to TBB thread local storage, with similar restrictions:
// https://www.threadingbuildingblocks.org/docs/help/reference/thread_local_storage/enumerable_thread_specific_cls.html
//
template <typename T,
typename Initialize = internal::ThreadLocalNoOpInitialize<T>,
template <typename T, typename Initialize = internal::ThreadLocalNoOpInitialize<T>,
typename Release = internal::ThreadLocalNoOpRelease<T>>
class ThreadLocal {
// We preallocate default constructed elements in MaxSizedVector.
static_assert(std::is_default_constructible<T>::value,
"ThreadLocal data type must be default constructible");
static_assert(std::is_default_constructible<T>::value, "ThreadLocal data type must be default constructible");
public:
explicit ThreadLocal(int capacity)
: ThreadLocal(capacity, internal::ThreadLocalNoOpInitialize<T>(),
internal::ThreadLocalNoOpRelease<T>()) {}
: ThreadLocal(capacity, internal::ThreadLocalNoOpInitialize<T>(), internal::ThreadLocalNoOpRelease<T>()) {}
ThreadLocal(int capacity, Initialize initialize)
: ThreadLocal(capacity, std::move(initialize),
internal::ThreadLocalNoOpRelease<T>()) {}
: ThreadLocal(capacity, std::move(initialize), internal::ThreadLocalNoOpRelease<T>()) {}
ThreadLocal(int capacity, Initialize initialize, Release release)
: initialize_(std::move(initialize)),
@@ -174,8 +169,7 @@ class ThreadLocal {
// free storage. If old value in `filled_records_` is larger than the
// records capacity, it means that some other thread added an element while
// we were traversing lookup table.
int insertion_index =
filled_records_.fetch_add(1, std::memory_order_relaxed);
int insertion_index = filled_records_.fetch_add(1, std::memory_order_relaxed);
if (insertion_index >= capacity_) return SpilledLocal(this_thread);
// At this point it's guaranteed that we can access to

View File

@@ -24,8 +24,7 @@ class ThreadPoolInterface {
// Submits a closure to be run by threads in the range [start, end) in the
// pool.
virtual void ScheduleWithHint(std::function<void()> fn, int /*start*/,
int /*end*/) {
virtual void ScheduleWithHint(std::function<void()> fn, int /*start*/, int /*end*/) {
// Just defer to Schedule in case sub-classes aren't interested in
// overriding this functionality.
Schedule(fn);