ExtendedAtomicOps-arm64.h 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. #define ASM_DMB_ISH "dmb ish\n\t"
  2. #if defined(__ARM_ARCH_7S__)
  3. // this is sufficient for Swift processors
  4. # define ASM_REL "dmb ishst\n\t"
  5. #else
  6. # define ASM_REL "dmb ish\n\t"
  7. #endif
  8. static inline void atomic_thread_fence(memory_order_relaxed_t)
  9. {
  10. }
  11. static inline void atomic_thread_fence(memory_order_acquire_t)
  12. {
  13. __asm__ __volatile__ ("dmb ld\n\t" : : : "memory");
  14. }
  15. static inline void atomic_thread_fence(memory_order_release_t)
  16. {
  17. __asm__ __volatile__ (ASM_REL : : : "memory");
  18. }
  19. static inline void atomic_thread_fence(memory_order_acq_rel_t)
  20. {
  21. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  22. }
  23. static inline void atomic_thread_fence(int /* memory_order_seq_cst_t */)
  24. {
  25. __asm__ __volatile__ (ASM_DMB_ISH : : : "memory");
  26. }
  27. #define ATOMIC_LOAD(opc) \
  28. atomic_word res; \
  29. __asm__ __volatile__ \
  30. ( \
  31. opc " %0, %1\n\t" \
  32. : "=r" (res) \
  33. : "m" (*p) \
  34. ); \
  35. return res;
  36. /*
  37. * int support
  38. */
  39. static inline int atomic_load_explicit(const volatile int* p, memory_order_relaxed_t)
  40. {
  41. int res;
  42. __asm__ __volatile__
  43. (
  44. "ldr %w0, %1\n\t"
  45. : "=r" (res)
  46. : "m" (*p)
  47. );
  48. return res;
  49. }
  50. static inline int atomic_load_explicit(const volatile int* p, memory_order_acquire_t)
  51. {
  52. int res;
  53. __asm__ __volatile__
  54. (
  55. "ldar %w0, %1\n\t"
  56. : "=r" (res)
  57. : "m" (*p)
  58. );
  59. return res;
  60. }
  61. static inline int atomic_load_explicit(const volatile int* p, int /* memory_order_seq_cst_t */)
  62. {
  63. int res;
  64. __asm__ __volatile__
  65. (
  66. "ldar %w0, %1\n\t"
  67. : "=r" (res)
  68. : "m" (*p)
  69. );
  70. return res;
  71. }
  72. /*
  73. * native word support
  74. */
  75. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_relaxed_t)
  76. {
  77. ATOMIC_LOAD("ldr")
  78. }
  79. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, memory_order_acquire_t)
  80. {
  81. ATOMIC_LOAD("ldar")
  82. }
  83. static inline atomic_word atomic_load_explicit(const volatile atomic_word* p, int /* memory_order_seq_cst_t */)
  84. {
  85. ATOMIC_LOAD("ldar")
  86. }
  87. #define ATOMIC_STORE(opc) \
  88. __asm__ __volatile__ \
  89. ( \
  90. opc " %1, %0\n\t" \
  91. : "=m" (*p) \
  92. : "r" (v) \
  93. : "memory" \
  94. );
  95. /*
  96. * int support
  97. */
  98. static inline void atomic_store_explicit(volatile int* p, int v, memory_order_relaxed_t)
  99. {
  100. __asm__ __volatile__
  101. (
  102. "str %w1, %0\n\t"
  103. : "=m" (*p)
  104. : "r" (v)
  105. : "memory"
  106. );
  107. }
  108. static inline void atomic_store_explicit(volatile int* p, int v, memory_order_release_t)
  109. {
  110. __asm__ __volatile__
  111. (
  112. "stlr %w1, %0\n\t"
  113. : "=m" (*p)
  114. : "r" (v)
  115. : "memory"
  116. );
  117. }
  118. static inline void atomic_store_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  119. {
  120. __asm__ __volatile__
  121. (
  122. "stlr %w1, %0\n\t"
  123. : "=m" (*p)
  124. : "r" (v)
  125. : "memory"
  126. );
  127. }
  128. /*
  129. * native word support
  130. */
  131. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  132. {
  133. ATOMIC_STORE("str")
  134. }
  135. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  136. {
  137. ATOMIC_STORE("stlr")
  138. }
  139. static inline void atomic_store_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  140. {
  141. ATOMIC_STORE("stlr")
  142. }
  143. #define ATOMIC_PFIX_int "%w"
  144. #define ATOMIC_PFIX_atomic_word "%"
  145. #define ATOMIC_PFIX(WORD) ATOMIC_PFIX_##WORD
  146. #define ATOMIC_XCHG(WORD, LD, ST) \
  147. atomic_word res; \
  148. atomic_word success; \
  149. __asm__ __volatile__ \
  150. ( \
  151. "0:\n\t" \
  152. LD " " ATOMIC_PFIX(WORD) "2, [%4]\n\t" \
  153. ST " %w0, " ATOMIC_PFIX(WORD) "3, [%4]\n\t" \
  154. "cbnz %w0, 0b\n\t" \
  155. : "=&r" (success), "+m" (*p), "=&r" (res) \
  156. : "r" (v), "r" (p) \
  157. : "memory" \
  158. ); \
  159. return res;
  160. /*
  161. * int support
  162. */
  163. static inline int atomic_exchange_explicit(volatile int* p, int v, memory_order_relaxed_t)
  164. {
  165. ATOMIC_XCHG(int, "ldxr", "stxr")
  166. }
  167. static inline int atomic_exchange_explicit(volatile int* p, int v, memory_order_acquire_t)
  168. {
  169. ATOMIC_XCHG(int, "ldaxr", "stxr")
  170. }
  171. static inline int atomic_exchange_explicit(volatile int* p, int v, memory_order_release_t)
  172. {
  173. ATOMIC_XCHG(int, "ldxr", "stlxr")
  174. }
  175. static inline int atomic_exchange_explicit(volatile int* p, int v, memory_order_acq_rel_t)
  176. {
  177. ATOMIC_XCHG(int, "ldaxr", "stlxr")
  178. }
  179. static inline int atomic_exchange_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  180. {
  181. ATOMIC_XCHG(int, "ldaxr", "stlxr")
  182. }
  183. /*
  184. * native word support
  185. */
  186. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  187. {
  188. ATOMIC_XCHG(atomic_word, "ldxr", "stxr")
  189. }
  190. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  191. {
  192. ATOMIC_XCHG(atomic_word, "ldaxr", "stxr")
  193. }
  194. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  195. {
  196. ATOMIC_XCHG(atomic_word, "ldxr", "stlxr")
  197. }
  198. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  199. {
  200. ATOMIC_XCHG(atomic_word, "ldaxr", "stlxr")
  201. }
  202. static inline atomic_word atomic_exchange_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  203. {
  204. ATOMIC_XCHG(atomic_word, "ldaxr", "stlxr")
  205. }
  206. // atomic_compare_exchange_weak_explicit: can fail spuriously even if *p == *oldval
  207. #undef ATOMIC_CMP_XCHG
  208. #define ATOMIC_CMP_XCHG(WORD, LD, ST) \
  209. atomic_word res; \
  210. atomic_word failure = 1; \
  211. __asm__ __volatile__ \
  212. ( \
  213. LD " " ATOMIC_PFIX(WORD) "2, [%4] \n\t" \
  214. "cmp " ATOMIC_PFIX(WORD) "2, " ATOMIC_PFIX(WORD) "5 \n\t" \
  215. "b.ne 1f \n\t" \
  216. ST " %w0, " ATOMIC_PFIX(WORD) "3, [%4] \n\t" \
  217. "1: \n\t" \
  218. "clrex \n\t" \
  219. : "+&r" (failure), "+m" (*p), "=&r" (res) \
  220. : "r" (newval), "r" (p), "r" (*oldval) \
  221. : "cc", "memory" \
  222. ); \
  223. *oldval = res; \
  224. return (failure == 0);
  225. /*
  226. * int support
  227. */
  228. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_relaxed_t, memory_order_relaxed_t)
  229. {
  230. ATOMIC_CMP_XCHG(int, "ldxr", "stxr")
  231. }
  232. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_acquire_t, memory_order_relaxed_t)
  233. {
  234. ATOMIC_CMP_XCHG(int, "ldaxr", "stxr")
  235. }
  236. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_release_t, memory_order_relaxed_t)
  237. {
  238. ATOMIC_CMP_XCHG(int, "ldxr", "stlxr")
  239. }
  240. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  241. {
  242. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  243. }
  244. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  245. {
  246. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  247. }
  248. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_acquire_t, memory_order_acquire_t)
  249. {
  250. ATOMIC_CMP_XCHG(int, "ldaxr", "stxr")
  251. }
  252. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_release_t, memory_order_release_t)
  253. {
  254. ATOMIC_CMP_XCHG(int, "ldxr", "stlxr")
  255. }
  256. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  257. {
  258. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  259. }
  260. static inline bool atomic_compare_exchange_weak_explicit(volatile int* p, int *oldval, int newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  261. {
  262. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  263. }
  264. /*
  265. * native word support
  266. */
  267. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  268. {
  269. ATOMIC_CMP_XCHG(atomic_word, "ldxr", "stxr")
  270. }
  271. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  272. {
  273. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stxr")
  274. }
  275. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  276. {
  277. ATOMIC_CMP_XCHG(atomic_word, "ldxr", "stlxr")
  278. }
  279. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  280. {
  281. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  282. }
  283. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  284. {
  285. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  286. }
  287. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  288. {
  289. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stxr")
  290. }
  291. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  292. {
  293. ATOMIC_CMP_XCHG(atomic_word, "ldxr", "stlxr")
  294. }
  295. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  296. {
  297. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  298. }
  299. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  300. {
  301. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  302. }
  303. // atomic_compare_exchange_strong_explicit: does loop and only returns false if *p != *oldval
  304. #undef ATOMIC_CMP_XCHG
  305. #define ATOMIC_CMP_XCHG(WORD, LD, ST) \
  306. atomic_word res; \
  307. atomic_word failure = 1; \
  308. __asm__ __volatile__ \
  309. ( \
  310. "0: \n\t" \
  311. "mov %w0, #1 \n\t" /* reset failure each loop */ \
  312. LD " " ATOMIC_PFIX(WORD) "2, [%4] \n\t" \
  313. "cmp " ATOMIC_PFIX(WORD) "2, " ATOMIC_PFIX(WORD) "5 \n\t" \
  314. "b.ne 1f \n\t" \
  315. ST " %w0, " ATOMIC_PFIX(WORD) "3, [%4] \n\t" \
  316. "cbnz %w0, 0b \n\t" \
  317. "1: \n\t" \
  318. "clrex \n\t" \
  319. : "+&r" (failure), "+m" (*p), "=&r" (res) \
  320. : "r" (newval), "r" (p), "r" (*oldval) \
  321. : "cc", "memory" \
  322. ); \
  323. *oldval = res; \
  324. return (failure == 0);
  325. /*
  326. * int support
  327. */
  328. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_relaxed_t, memory_order_relaxed_t)
  329. {
  330. ATOMIC_CMP_XCHG(int, "ldxr", "stxr")
  331. }
  332. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_acquire_t, memory_order_relaxed_t)
  333. {
  334. ATOMIC_CMP_XCHG(int, "ldaxr", "stxr")
  335. }
  336. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_release_t, memory_order_relaxed_t)
  337. {
  338. ATOMIC_CMP_XCHG(int, "ldxr", "stlxr")
  339. }
  340. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  341. {
  342. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  343. }
  344. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  345. {
  346. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  347. }
  348. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_acquire_t, memory_order_acquire_t)
  349. {
  350. ATOMIC_CMP_XCHG(int, "ldaxr", "stxr")
  351. }
  352. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_release_t, memory_order_release_t)
  353. {
  354. ATOMIC_CMP_XCHG(int, "ldxr", "stlxr")
  355. }
  356. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  357. {
  358. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  359. }
  360. static inline bool atomic_compare_exchange_strong_explicit(volatile int* p, int *oldval, int newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  361. {
  362. ATOMIC_CMP_XCHG(int, "ldaxr", "stlxr")
  363. }
  364. /*
  365. * native word support
  366. */
  367. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_relaxed_t, memory_order_relaxed_t)
  368. {
  369. ATOMIC_CMP_XCHG(atomic_word, "ldxr", "stxr")
  370. }
  371. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_relaxed_t)
  372. {
  373. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stxr")
  374. }
  375. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_relaxed_t)
  376. {
  377. ATOMIC_CMP_XCHG(atomic_word, "ldxr", "stlxr")
  378. }
  379. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_relaxed_t)
  380. {
  381. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  382. }
  383. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, memory_order_relaxed_t)
  384. {
  385. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  386. }
  387. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acquire_t, memory_order_acquire_t)
  388. {
  389. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stxr")
  390. }
  391. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_release_t, memory_order_release_t)
  392. {
  393. ATOMIC_CMP_XCHG(atomic_word, "ldxr", "stlxr")
  394. }
  395. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, memory_order_acq_rel_t, memory_order_acq_rel_t)
  396. {
  397. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  398. }
  399. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word* p, atomic_word *oldval, atomic_word newval, int /* memory_order_seq_cst_t */, int /* memory_order_seq_cst_t */)
  400. {
  401. ATOMIC_CMP_XCHG(atomic_word, "ldaxr", "stlxr")
  402. }
  403. #define ATOMIC_OP(WORD, LD, ST, OP) \
  404. long long res, tmp; \
  405. int success; \
  406. __asm__ __volatile__ \
  407. ( \
  408. "0: \n\t" \
  409. LD " " ATOMIC_PFIX(WORD) "2, [%5] \n\t" \
  410. OP " " ATOMIC_PFIX(WORD) "3, " ATOMIC_PFIX(WORD) "2, " ATOMIC_PFIX(WORD) "4 \n\t" \
  411. ST " %w0, " ATOMIC_PFIX(WORD) "3, [%5] \n\t" \
  412. "cbnz %w0, 0b \n\t" \
  413. : "=&r" (success), "+m" (*p), "=&r" (res), "=&r" (tmp) \
  414. : "Ir" ((long long) v), "r" (p) \
  415. : "cc", "memory" \
  416. ); \
  417. return (WORD) res;
  418. /*
  419. * int support
  420. */
  421. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_relaxed_t)
  422. {
  423. ATOMIC_OP(int, "ldxr", "stxr", "add")
  424. }
  425. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_acquire_t)
  426. {
  427. ATOMIC_OP(int, "ldaxr", "stxr", "add")
  428. }
  429. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_release_t)
  430. {
  431. ATOMIC_OP(int, "ldxr", "stlxr", "add")
  432. }
  433. static inline int atomic_fetch_add_explicit(volatile int* p, int v, memory_order_acq_rel_t)
  434. {
  435. ATOMIC_OP(int, "ldaxr", "stlxr", "add")
  436. }
  437. static inline int atomic_fetch_add_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  438. {
  439. ATOMIC_OP(int, "ldaxr", "stlxr", "add")
  440. }
  441. /*
  442. * native word support
  443. */
  444. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  445. {
  446. ATOMIC_OP(atomic_word, "ldxr", "stxr", "add")
  447. }
  448. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  449. {
  450. ATOMIC_OP(atomic_word, "ldaxr", "stxr", "add")
  451. }
  452. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  453. {
  454. ATOMIC_OP(atomic_word, "ldxr", "stlxr", "add")
  455. }
  456. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  457. {
  458. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "add")
  459. }
  460. static inline atomic_word atomic_fetch_add_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  461. {
  462. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "add")
  463. }
  464. /*
  465. * int support
  466. */
  467. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_relaxed_t)
  468. {
  469. ATOMIC_OP(int, "ldxr", "stxr", "sub")
  470. }
  471. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_acquire_t)
  472. {
  473. ATOMIC_OP(int, "ldaxr", "stxr", "sub")
  474. }
  475. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_release_t)
  476. {
  477. ATOMIC_OP(int, "ldxr", "stlxr", "sub")
  478. }
  479. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, memory_order_acq_rel_t)
  480. {
  481. ATOMIC_OP(int, "ldaxr", "stlxr", "sub")
  482. }
  483. static inline int atomic_fetch_sub_explicit(volatile int* p, int v, int /* memory_order_seq_cst_t */)
  484. {
  485. ATOMIC_OP(int, "ldaxr", "stlxr", "sub")
  486. }
  487. /*
  488. * native word support
  489. */
  490. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_relaxed_t)
  491. {
  492. ATOMIC_OP(atomic_word, "ldxr", "stxr", "sub")
  493. }
  494. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_acquire_t)
  495. {
  496. ATOMIC_OP(atomic_word, "ldaxr", "stxr", "sub")
  497. }
  498. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_release_t)
  499. {
  500. ATOMIC_OP(atomic_word, "ldxr", "stlxr", "sub")
  501. }
  502. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, memory_order_acq_rel_t)
  503. {
  504. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "sub")
  505. }
  506. static inline atomic_word atomic_fetch_sub_explicit(volatile atomic_word* p, atomic_word v, int /* memory_order_seq_cst_t */)
  507. {
  508. ATOMIC_OP(atomic_word, "ldaxr", "stlxr", "sub")
  509. }
  510. /*
  511. * extensions
  512. */
  513. static inline void atomic_retain(volatile int* p)
  514. {
  515. atomic_fetch_add_explicit(p, 1, memory_order_relaxed);
  516. }
  517. static inline bool atomic_release(volatile int* p)
  518. {
  519. bool res = atomic_fetch_sub_explicit(p, 1, memory_order_release) == 1;
  520. if (res)
  521. {
  522. atomic_thread_fence(memory_order_acquire);
  523. }
  524. return res;
  525. }
  526. /*
  527. * double word
  528. */
  529. // Note: the only way to get atomic 128-bit memory accesses on ARM64 is to use ldxp/stxp with a loop
  530. // (ldxp and stxp instructions are not guaranteed to appear atomic)
  531. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_relaxed_t)
  532. {
  533. atomic_word2 v;
  534. atomic_word success;
  535. __asm__ __volatile__
  536. (
  537. "0:\n\t"
  538. "ldxp\t%1, %2, [%3]\n\t"
  539. "stxp\t%w0, %1, %2, [%3]\n\t"
  540. "cbnz\t%w0, 0b\n\t"
  541. : "=&r" (success), "=&r" (v.lo), "=&r" (v.hi)
  542. : "r" (p)
  543. );
  544. return v;
  545. }
  546. static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_acquire_t)
  547. {
  548. atomic_word2 v;
  549. atomic_word success;
  550. __asm__ __volatile__
  551. (
  552. "0:\n\t"
  553. "ldaxp\t%1, %2, [%3]\n\t"
  554. "stxp\t%w0, %1, %2, [%3]\n\t"
  555. "cbnz\t%w0, 0b\n\t"
  556. : "=&r" (success), "=&r" (v.lo), "=&r" (v.hi)
  557. : "r" (p)
  558. );
  559. return v;
  560. }
  561. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_relaxed_t)
  562. {
  563. atomic_word lo;
  564. atomic_word hi;
  565. atomic_word success;
  566. __asm__ __volatile__
  567. (
  568. "0:\n\t"
  569. "ldxp\t%2, %3, [%6]\n\t"
  570. "stxp\t%w0, %4, %5, [%6]\n\t"
  571. "cbnz\t%w0, 0b\n\t"
  572. : "=&r" (success), "=m" (*p), "=&r" (lo), "=&r" (hi)
  573. : "r" (v.lo), "r" (v.hi), "r" (p)
  574. : "memory"
  575. );
  576. }
  577. static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_release_t)
  578. {
  579. atomic_word lo;
  580. atomic_word hi;
  581. atomic_word success;
  582. __asm__ __volatile__
  583. (
  584. "0:\n\t"
  585. "ldxp\t%2, %3, [%6]\n\t"
  586. "stlxp\t%w0, %4, %5, [%6]\n\t"
  587. "cbnz\t%w0, 0b\n\t"
  588. : "=&r" (success), "=m" (*p), "=&r" (lo), "=&r" (hi)
  589. : "r" (v.lo), "r" (v.hi), "r" (p)
  590. : "memory"
  591. );
  592. }
  593. static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 val, memory_order_acq_rel_t)
  594. {
  595. atomic_word2 oldval;
  596. atomic_word success;
  597. __asm__ __volatile__
  598. (
  599. "0:\n\t"
  600. "ldaxp\t%2, %3, [%6]\n\t"
  601. "stlxp\t%w0, %5, %4, [%6]\n\t"
  602. "cbnz\t%w0, 0b\n\t"
  603. : "=&r" (success), "+m" (*p), "=&r" (oldval.lo), "=&r" (oldval.hi)
  604. : "r" (val.hi), "r" (val.lo), "r" (p)
  605. : "memory"
  606. );
  607. return oldval;
  608. }
  609. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acquire_t, memory_order_relaxed_t)
  610. {
  611. atomic_word lo = oldval->lo;
  612. atomic_word hi = oldval->hi;
  613. atomic_word success;
  614. __asm__ __volatile__
  615. (
  616. "0:\n\t"
  617. "ldaxp\t%2, %3, [%8]\n\t"
  618. "cmp\t%3, %5\n\t"
  619. "b.ne\t1f\n\t"
  620. "cmp\t%2, %4\n\t"
  621. "b.ne\t1f\n\t"
  622. "stxp\t%w0, %6, %7, [%8]\n\t"
  623. "cbnz\t%w0, 0b\n\t"
  624. "1:\n\t"
  625. "clrex\n\t"
  626. : "=&r" (success), "+m" (*p), "=&r" (oldval->lo), "=&r" (oldval->hi)
  627. : "r" (lo), "r" (hi), "r" (newval.lo), "r" (newval.hi), "r" (p), "0" (1)
  628. : "cc", "memory"
  629. );
  630. return success == 0;
  631. }
  632. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_release_t, memory_order_relaxed_t)
  633. {
  634. atomic_word lo = oldval->lo;
  635. atomic_word hi = oldval->hi;
  636. atomic_word success;
  637. __asm__ __volatile__
  638. (
  639. "0:\n\t"
  640. "ldxp\t%2, %3, [%8]\n\t"
  641. "cmp\t%3, %5\n\t"
  642. "b.ne\t1f\n\t"
  643. "cmp\t%2, %4\n\t"
  644. "b.ne\t1f\n\t"
  645. "stlxp\t%w0, %6, %7, [%8]\n\t"
  646. "cbnz\t%w0, 0b\n\t"
  647. "1:\n\t"
  648. "clrex\n\t"
  649. : "=&r" (success), "+m" (*p), "=&r" (oldval->lo), "=&r" (oldval->hi)
  650. : "r" (lo), "r" (hi), "r" (newval.lo), "r" (newval.hi), "r" (p), "0" (1)
  651. : "cc", "memory"
  652. );
  653. return success == 0;
  654. }
  655. static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, int /*memory_order_acq_rel_t*/, memory_order_relaxed_t)
  656. {
  657. atomic_word lo = oldval->lo;
  658. atomic_word hi = oldval->hi;
  659. atomic_word success;
  660. __asm__ __volatile__
  661. (
  662. "0:\n\t"
  663. "ldaxp\t%2, %3, [%8]\n\t"
  664. "cmp\t%3, %5\n\t"
  665. "b.ne\t1f\n\t"
  666. "cmp\t%2, %4\n\t"
  667. "b.ne\t1f\n\t"
  668. "stlxp\t%w0, %6, %7, [%8]\n\t"
  669. "cbnz\t%w0, 0b\n\t"
  670. "1:\n\t"
  671. "clrex\n\t"
  672. : "=&r" (success), "+m" (*p), "=&r" (oldval->lo), "=&r" (oldval->hi)
  673. : "r" (lo), "r" (hi), "r" (newval.lo), "r" (newval.hi), "r" (p), "0" (1)
  674. : "cc", "memory"
  675. );
  676. return success == 0;
  677. }
  678. template<class SuccOrder, class FailOrder>
  679. static inline bool atomic_compare_exchange_weak_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, SuccOrder o1, FailOrder o2)
  680. {
  681. // TODO: implement proper weak compare exchange
  682. return atomic_compare_exchange_strong_explicit(p, oldval, newval, o1, o2);
  683. }