ExtendedAtomicOps-clang-gcc.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #ifndef UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION
  2. # define UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION 1
  3. #endif
  4. namespace detail
  5. {
  6. #if UNITY_ATOMIC_USE_CLANG_ATOMICS && UNITY_ATOMIC_USE_GCC_ATOMICS
  7. # error Cannot use both Clang and GCC atomic built-ins
  8. #elif UNITY_ATOMIC_USE_CLANG_ATOMICS
  9. # if !__has_feature(c_atomic) && !__has_extension(c_atomic)
  10. # error "missing atomic built-in functions"
  11. # endif
  12. # define INTERNAL_UNITY_ATOMIC_THREAD_FENCE(memorder) __c11_atomic_thread_fence(memorder)
  13. # define INTERNAL_UNITY_ATOMIC_LOAD(ptr, memorder) __c11_atomic_load(ptr, memorder)
  14. # define INTERNAL_UNITY_ATOMIC_STORE(ptr, value, memorder) __c11_atomic_store(ptr, value, memorder)
  15. # define INTERNAL_UNITY_ATOMIC_EXCHANGE(ptr, value, memorder) __c11_atomic_exchange(ptr, value, memorder)
  16. # define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG(ptr, oldval, newval, success, fail) __c11_atomic_compare_exchange_strong(ptr, oldval, newval, success, fail)
  17. # define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, oldval, newval, success, fail) __c11_atomic_compare_exchange_weak(ptr, oldval, newval, success, fail)
  18. # define INTERNAL_UNITY_ATOMIC_FETCH_ADD(ptr, value, memorder) __c11_atomic_fetch_add(ptr, value, memorder)
  19. # define INTERNAL_UNITY_ATOMIC_FETCH_SUB(ptr, value, memorder) __c11_atomic_fetch_sub(ptr, value, memorder)
  20. # define INTERNAL_UNITY_ATOMIC_TYPE(type) _Atomic(type)
  21. # define INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(type) __c11_atomic_is_lock_free(sizeof(type))
  22. #elif UNITY_ATOMIC_USE_GCC_ATOMICS
  23. # if (!PLATFORM_PS4) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 7))
  24. # error "__atomic built-in functions not supported on GCC versions older than 4.7"
  25. # endif
  26. # if UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION
  27. # if __GCC_ATOMIC_INT_LOCK_FREE + 0 != 2 || __GCC_ATOMIC_LLONG_LOCK_FREE + 0 != 2
  28. # error "atomic ops are not lock-free for some required data types"
  29. # endif
  30. # endif
  31. # define INTERNAL_UNITY_ATOMIC_THREAD_FENCE(memorder) __atomic_thread_fence(memorder)
  32. # define INTERNAL_UNITY_ATOMIC_LOAD(ptr, memorder) __atomic_load_n(ptr, memorder)
  33. # define INTERNAL_UNITY_ATOMIC_STORE(ptr, value, memorder) __atomic_store_n(ptr, value, memorder)
  34. # define INTERNAL_UNITY_ATOMIC_EXCHANGE(ptr, value, memorder) __atomic_exchange_n(ptr, value, memorder)
  35. # define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG(ptr, oldval, newval, success, fail) __atomic_compare_exchange_n(ptr, oldval, newval, false, success, fail)
  36. # define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, oldval, newval, success, fail) __atomic_compare_exchange_n(ptr, oldval, newval, true, success, fail)
  37. # define INTERNAL_UNITY_ATOMIC_FETCH_ADD(ptr, value, memorder) __atomic_fetch_add(ptr, value, memorder)
  38. # define INTERNAL_UNITY_ATOMIC_FETCH_SUB(ptr, value, memorder) __atomic_fetch_sub(ptr, value, memorder)
  39. # define INTERNAL_UNITY_ATOMIC_TYPE(type) type
  40. # if __GNUC__ >= 5
  41. // GCC pre-5 did not allow __atomic_always_lock_free in static expressions such as CompileTimeAssert
  42. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62024
  43. # define INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(type) __atomic_always_lock_free(sizeof(type), 0)
  44. # else
  45. # define INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(type) true
  46. # endif
  47. #else
  48. # error One of UNITY_ATOMIC_USE_CLANG_ATOMICS or UNITY_ATOMIC_USE_GCC_ATOMICS must be defined to 1
  49. #endif
  50. inline int MemOrder(memory_order_relaxed_t) { return __ATOMIC_RELAXED; }
  51. inline int MemOrder(memory_order_release_t) { return __ATOMIC_RELEASE; }
  52. inline int MemOrder(memory_order_acquire_t) { return __ATOMIC_ACQUIRE; }
  53. inline int MemOrder(memory_order_acq_rel_t) { return __ATOMIC_ACQ_REL; }
  54. inline int MemOrder(memory_order_seq_cst_t) { return __ATOMIC_SEQ_CST; }
  55. void MemOrder(...); // generate compile error on unsupported mem order types
  56. #define INTERNAL_UNITY_ATOMIC_TYPEDEF(nonatomic, atomic) \
  57. typedef INTERNAL_UNITY_ATOMIC_TYPE(nonatomic) atomic; \
  58. CompileTimeAssert(!UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION || INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(atomic), #atomic " is not lock-free on this platform")
  59. INTERNAL_UNITY_ATOMIC_TYPEDEF(non_atomic_word, native_atomic_word);
  60. INTERNAL_UNITY_ATOMIC_TYPEDEF(non_atomic_word2, native_atomic_word2);
  61. INTERNAL_UNITY_ATOMIC_TYPEDEF(int, native_atomic_int);
  62. #if UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION
  63. CompileTimeAssert(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 + 0, "requires 32bit CAS");
  64. CompileTimeAssert(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 + 0, "requires 64bit CAS");
  65. // we will have special implementation for arm64
  66. #if __SIZEOF_POINTER__ == 8 && !defined(__arm64__)
  67. CompileTimeAssert(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 + 0, "requires 128bit CAS");
  68. #endif
  69. #endif
  70. #undef INTERNAL_UNITY_ATOMIC_TYPEDEF
  71. inline native_atomic_word* AtomicPtr(atomic_word* p) { return reinterpret_cast<native_atomic_word*>(p); }
  72. inline volatile native_atomic_word* AtomicPtr(volatile atomic_word* p) { return reinterpret_cast<volatile native_atomic_word*>(p); }
  73. inline native_atomic_word2* AtomicPtr(atomic_word2* p) { return reinterpret_cast<native_atomic_word2*>(&p->v); }
  74. inline volatile native_atomic_word2* AtomicPtr(volatile atomic_word2* p) { return reinterpret_cast<volatile native_atomic_word2*>(&p->v); }
  75. inline non_atomic_word* NonAtomicPtr(atomic_word* v) { return v; }
  76. // same as above: inline non_atomic_word* NonAtomicPtr(non_atomic_word* v) { return v; }
  77. inline non_atomic_word2* NonAtomicPtr(atomic_word2* v) { return &v->v; }
  78. inline non_atomic_word2* NonAtomicPtr(non_atomic_word2* v) { return v; }
  79. inline non_atomic_word NonAtomicValue(atomic_word v) { return v; }
  80. // same as above: inline non_atomic_word NonAtomicValue(non_atomic_word v) { return v; }
  81. inline non_atomic_word2 NonAtomicValue(atomic_word2 v) { return v.v; }
  82. inline non_atomic_word2 NonAtomicValue(non_atomic_word2 v) { return v; }
  83. inline atomic_word UnityAtomicValue(non_atomic_word v) { return v; }
  84. inline atomic_word2 UnityAtomicValue(non_atomic_word2 v) { atomic_word2 r; r.v = v; return r; }
  85. #ifdef UNITY_ATOMIC_INT_OVERLOAD
  86. inline native_atomic_int* AtomicPtr(int* p) { return reinterpret_cast<native_atomic_int*>(p); }
  87. inline volatile native_atomic_int* AtomicPtr(volatile int* p) { return reinterpret_cast<volatile native_atomic_int*>(p); }
  88. inline int* NonAtomicPtr(int* v) { return v; }
  89. inline int NonAtomicValue(int v) { return v; }
  90. inline int UnityAtomicValue(int v) { return v; }
  91. #endif
  92. template<typename T> struct Identity { typedef T type; };
  93. } // namespace detail
  94. template<typename MemOrder>
  95. static inline void atomic_thread_fence(MemOrder memOrder)
  96. {
  97. INTERNAL_UNITY_ATOMIC_THREAD_FENCE(detail::MemOrder(memOrder));
  98. }
  99. template<typename T, typename MemOrder>
  100. static inline T atomic_load_explicit(const volatile T* p, MemOrder memOrder)
  101. {
  102. return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_LOAD(detail::AtomicPtr(const_cast<T*>(p)), detail::MemOrder(memOrder)));
  103. }
  104. template<typename T, typename MemOrder>
  105. static inline void atomic_store_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
  106. {
  107. INTERNAL_UNITY_ATOMIC_STORE(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder));
  108. }
  109. template<typename T, typename MemOrder>
  110. static inline T atomic_exchange_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
  111. {
  112. return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_EXCHANGE(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder)));
  113. }
  114. template<typename T, typename MemOrderSuccess, typename MemOrderFail>
  115. static inline bool atomic_compare_exchange_weak_explicit(volatile T* p, T* oldval, typename detail::Identity<T>::type newval,
  116. MemOrderSuccess memOrderSuccess, MemOrderFail memOrderFail)
  117. {
  118. return INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK(detail::AtomicPtr(p), detail::NonAtomicPtr(oldval), detail::NonAtomicValue(newval),
  119. detail::MemOrder(memOrderSuccess), detail::MemOrder(memOrderFail));
  120. }
  121. template<typename T, typename MemOrderSuccess, typename MemOrderFail>
  122. static inline bool atomic_compare_exchange_strong_explicit(volatile T* p, T* oldval, typename detail::Identity<T>::type newval,
  123. MemOrderSuccess memOrderSuccess, MemOrderFail memOrderFail)
  124. {
  125. return INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG(detail::AtomicPtr(p), detail::NonAtomicPtr(oldval), detail::NonAtomicValue(newval),
  126. detail::MemOrder(memOrderSuccess), detail::MemOrder(memOrderFail));
  127. }
  128. template<typename T, typename MemOrder>
  129. static inline T atomic_fetch_add_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
  130. {
  131. return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_FETCH_ADD(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder)));
  132. }
  133. template<typename T, typename MemOrder>
  134. static inline T atomic_fetch_sub_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
  135. {
  136. return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_FETCH_SUB(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder)));
  137. }
  138. /*
  139. * extensions
  140. */
  141. static inline void atomic_retain(volatile int* p)
  142. {
  143. atomic_fetch_add_explicit(p, 1, ::memory_order_relaxed);
  144. }
  145. static inline bool atomic_release(volatile int* p)
  146. {
  147. // Both paths here should be correct on any platform
  148. // On architectures where read-modify-write with memory_order_acq_rel is more expensive than memory_order_release
  149. // the idea is to use a global memory_order_acquire fence instead, but only when the reference count drops to 0.
  150. // Only then the acquire/release synchronization is needed to make sure everything prior to atomic_release happens before running a d'tor.
  151. #if defined(__arm__) || defined(__arm64__)
  152. bool res = atomic_fetch_sub_explicit(p, 1, ::memory_order_release) == 1;
  153. if (res)
  154. {
  155. atomic_thread_fence(::memory_order_acquire);
  156. }
  157. return res;
  158. #else
  159. return atomic_fetch_sub_explicit(p, 1, ::memory_order_acq_rel) == 1;
  160. #endif
  161. }
  162. #undef INTERNAL_UNITY_ATOMIC_THREAD_FENCE
  163. #undef INTERNAL_UNITY_ATOMIC_LOAD
  164. #undef INTERNAL_UNITY_ATOMIC_STORE
  165. #undef INTERNAL_UNITY_ATOMIC_EXCHANGE
  166. #undef INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG
  167. #undef INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK
  168. #undef INTERNAL_UNITY_ATOMIC_FETCH_ADD
  169. #undef INTERNAL_UNITY_ATOMIC_FETCH_SUB
  170. #undef INTERNAL_UNITY_ATOMIC_TYPE
  171. #undef INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE
  172. // the only way to get atomic 128-bit memory accesses on ARM64 is to use ld(r|a)ex/st(r|a)ex with a loop
  173. // going forward we want to get rid of most of it, by undefing ATOMIC_HAS_DCAS and providing custom impl of AtomicQueue and friends
  174. #if __SIZEOF_POINTER__ == 8 && (defined(__arm64__) || defined(__aarch64__))
  175. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_relaxed_t)
  176. {
  177. non_atomic_word2 v; volatile non_atomic_word2* pv = (volatile non_atomic_word2*)&p->v;
  178. do
  179. {
  180. v = __builtin_arm_ldrex(pv);
  181. }
  182. while (__builtin_arm_strex(v, pv));
  183. return (atomic_word2) {.v = v};
  184. }
  185. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_acquire_t)
  186. {
  187. non_atomic_word2 v; volatile non_atomic_word2* pv = (volatile non_atomic_word2*)&p->v;
  188. do
  189. {
  190. v = __builtin_arm_ldaex(pv);
  191. }
  192. while (__builtin_arm_strex(v, pv));
  193. return (atomic_word2) {.v = v};
  194. }
  195. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_relaxed_t)
  196. {
  197. non_atomic_word2 tmp; volatile non_atomic_word2* pv = &p->v;
  198. do
  199. {
  200. tmp = __builtin_arm_ldrex(pv);
  201. }
  202. while (__builtin_arm_strex(v.v, pv));
  203. }
  204. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_release_t)
  205. {
  206. non_atomic_word2 tmp; volatile non_atomic_word2* pv = &p->v;
  207. do
  208. {
  209. tmp = __builtin_arm_ldrex(pv);
  210. }
  211. while (__builtin_arm_stlex(v.v, pv));
  212. }
  213. static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 val, memory_order_acq_rel_t)
  214. {
  215. non_atomic_word2 ret; volatile non_atomic_word2* pv = &p->v;
  216. do
  217. {
  218. ret = __builtin_arm_ldaex(pv);
  219. }
  220. while (__builtin_arm_stlex(val.v, pv));
  221. return (atomic_word2) {.v = ret};
  222. }
  223. // the story behind this: in arm64 asm impl header we had overloads for memory_order_acquire_t, memory_order_release_t and int
  224. // int being taken in all other cases - it was memory_order_acq_rel_t impl below
  225. // now as we moved them here, int overload "looses" to template above, so it was never taken (and that is why we needed explicit overload)
  226. // for seq_cst being same as acq_rel:
  227. // first of all - it was the case for asm impl (and it worked for quite some time)
  228. // second: it seems apple itself uses ldaxr/stlxr in that case (without extra dmb)
  229. // this is both the case with some apple open-source code and asm generated for OSAtomicAdd32Barrier and friends
  230. #define COMPARE_EXCHANGE_IMPL(LOAD_FUNC, STORE_FUNC) \
  231. const non_atomic_word2 cmp = oldval->v; volatile non_atomic_word2* pv = &p->v; bool success = false; \
  232. do \
  233. { \
  234. non_atomic_word2 cur = oldval->v = LOAD_FUNC(pv); \
  235. success = (cur == cmp); \
  236. if (!success) \
  237. { \
  238. __builtin_arm_clrex(); \
  239. break; \
  240. } \
  241. } \
  242. while (STORE_FUNC(newval.v, pv)); \
  243. return success; \
  244. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acquire_t, memory_order_relaxed_t)
  245. {
  246. COMPARE_EXCHANGE_IMPL(__builtin_arm_ldaex, __builtin_arm_strex);
  247. }
  248. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_release_t, memory_order_relaxed_t)
  249. {
  250. COMPARE_EXCHANGE_IMPL(__builtin_arm_ldrex, __builtin_arm_stlex);
  251. }
  252. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  253. {
  254. COMPARE_EXCHANGE_IMPL(__builtin_arm_ldaex, __builtin_arm_stlex);
  255. }
  256. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_seq_cst_t, memory_order_relaxed_t)
  257. {
  258. COMPARE_EXCHANGE_IMPL(__builtin_arm_ldaex, __builtin_arm_stlex);
  259. }
  260. #undef COMPARE_EXCHANGE_IMPL
  261. #endif // __SIZEOF_POINTER__ == 8 && defined(__arm64__)
  262. // when implementing atomic operations in arm-specific way we need to take care of armv7/armv8 differences
  263. // armv8: has ldaex/stlex that add acquire/release semantics
  264. // armv7: we need to insert fence ourselves
  265. #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
  266. #if defined(__arm64__) || defined(__aarch64__)
  267. #define UNITY_ATOMIC_ARMV7_DMB_ISH
  268. #define UNITY_ATOMIC_ARMV8_LDAEX __builtin_arm_ldaex
  269. #define UNITY_ATOMIC_ARMV8_STLEX __builtin_arm_stlex
  270. #else
  271. #define UNITY_ATOMIC_ARMV7_DMB_ISH __builtin_arm_dmb(11);
  272. #define UNITY_ATOMIC_ARMV8_LDAEX __builtin_arm_ldrex
  273. #define UNITY_ATOMIC_ARMV8_STLEX __builtin_arm_strex
  274. #endif
  275. #endif