20
20
21
21
AWS_EXTERN_C_BEGIN
22
22
23
- #if !(defined(_M_IX86) || defined(_M_X64))
24
- # error Atomics are not currently supported for non-x86 MSVC platforms
23
+ #if !(defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) )
24
+ # error Atomics are not currently supported for non-x86 or ARM64 MSVC platforms
25
25
26
26
/*
27
27
* In particular, it's not clear that seq_cst will work properly on non-x86
@@ -63,6 +63,24 @@ AWS_EXTERN_C_BEGIN
63
63
* this use case.
64
64
*/
65
65
66
+ /* *
67
+ * Some general notes about ARM environments:
68
+ * ARM processors uses a weak memory model as opposed to the strong memory model used by Intel processors
69
+ * This means more permissible memory ordering allowed between stores and loads.
70
+ *
71
+ * Thus ARM port will need more hardware fences/barriers to assure developer intent.
72
+ * Memory barriers will prevent reordering stores and loads accross them depending on their type
73
+ * (read write, write only, read only ...)
74
+ *
75
+ * For more information about ARM64 memory ordering,
76
+ * see https://developer.arm.com/documentation/102336/0100/Memory-ordering
77
+ * For more information about Memory barriers,
78
+ * see https://developer.arm.com/documentation/102336/0100/Memory-barriers
79
+ * For more information about Miscosoft Interensic ARM64 APIs,
80
+ * see https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170
81
+ * Note: wrt _Interlocked[Op]64 is the same for ARM64 and x64 processors
82
+ */
83
+
66
84
#ifdef _M_IX86
67
85
# define AWS_INTERLOCKED_INT (x ) _Interlocked##x
68
86
typedef long aws_atomic_impl_int_t ;
@@ -71,6 +89,32 @@ typedef long aws_atomic_impl_int_t;
71
89
typedef long long aws_atomic_impl_int_t ;
72
90
#endif
73
91
92
+ #ifdef _M_ARM64
93
+ /* Hardware Read Write barrier, prevents all memory operations to cross the barrier in both directions */
94
+ # define AWS_RW_BARRIER () __dmb(_ARM64_BARRIER_SY)
95
+ /* Hardware Read barrier, prevents all memory operations to cross the barrier upwards */
96
+ # define AWS_R_BARRIER () __dmb(_ARM64_BARRIER_LD)
97
+ /* Hardware Write barrier, prevents all memory operations to cross the barrier downwards */
98
+ # define AWS_W_BARRIER () __dmb(_ARM64_BARRIER_ST)
99
+ /* Software barrier, prevents the compiler from reodering the operations across the barrier */
100
+ # define AWS_SW_BARRIER () _ReadWriteBarrier();
101
+ #else
102
+ /* hardware barriers, do nothing on x86 since it has a strong memory model
103
+ * as described in the section above: some general notes
104
+ */
105
+ # define AWS_RW_BARRIER ()
106
+ # define AWS_R_BARRIER ()
107
+ # define AWS_W_BARRIER ()
108
+ /*
109
+ * x86: only a compiler barrier is required. For seq_cst, we must use some form of interlocked operation for
110
+ * writes, but that's the caller's responsibility.
111
+ *
112
+ * Volatile ops may or may not imply this barrier, depending on the /volatile: switch, but adding an extra
113
+ * barrier doesn't hurt.
114
+ */
115
+ # define AWS_SW_BARRIER () _ReadWriteBarrier(); /* software barrier */
116
+ #endif
117
+
74
118
static inline void aws_atomic_priv_check_order (enum aws_memory_order order) {
75
119
#ifndef NDEBUG
76
120
switch (order) {
@@ -107,14 +151,8 @@ static inline void aws_atomic_priv_barrier_before(enum aws_memory_order order, e
107
151
return ;
108
152
}
109
153
110
- /*
111
- * x86: only a compiler barrier is required. For seq_cst, we must use some form of interlocked operation for
112
- * writes, but that's the caller's responsibility.
113
- *
114
- * Volatile ops may or may not imply this barrier, depending on the /volatile: switch, but adding an extra
115
- * barrier doesn't hurt.
116
- */
117
- _ReadWriteBarrier ();
154
+ AWS_RW_BARRIER ();
155
+ AWS_SW_BARRIER ();
118
156
}
119
157
120
158
static inline void aws_atomic_priv_barrier_after (enum aws_memory_order order, enum aws_atomic_mode_priv mode) {
@@ -131,11 +169,8 @@ static inline void aws_atomic_priv_barrier_after(enum aws_memory_order order, en
131
169
return ;
132
170
}
133
171
134
- /*
135
- * x86: only a compiler barrier is required. For seq_cst, we must use some form of interlocked operation for
136
- * writes, but that's the caller's responsibility.
137
- */
138
- _ReadWriteBarrier ();
172
+ AWS_RW_BARRIER ();
173
+ AWS_SW_BARRIER ();
139
174
}
140
175
141
176
/* *
@@ -344,16 +379,29 @@ void aws_atomic_thread_fence(enum aws_memory_order order) {
344
379
AWS_INTERLOCKED_INT (Exchange)(&x, 1 );
345
380
break ;
346
381
case aws_memory_order_release:
382
+ AWS_W_BARRIER ();
383
+ AWS_SW_BARRIER ();
384
+ break ;
347
385
case aws_memory_order_acquire:
386
+ AWS_R_BARRIER ();
387
+ AWS_SW_BARRIER ();
388
+ break ;
348
389
case aws_memory_order_acq_rel:
349
- _ReadWriteBarrier ();
390
+ AWS_RW_BARRIER ();
391
+ AWS_SW_BARRIER ();
350
392
break ;
351
393
case aws_memory_order_relaxed:
352
394
/* no-op */
353
395
break ;
354
396
}
355
397
}
356
398
399
+ /* prevent conflicts with other files that might pick the same names */
400
+ #undef AWS_RW_BARRIER
401
+ #undef AWS_R_BARRIER
402
+ #undef AWS_W_BARRIER
403
+ #undef AWS_SW_BARRIER
404
+
357
405
#define AWS_ATOMICS_HAVE_THREAD_FENCE
358
406
AWS_EXTERN_C_END
359
407
#endif
0 commit comments