Skip to content

Commit 2fd6652

Browse files
authored
Add support for WIN ARM64 (#1092)
1 parent 71c6ab7 commit 2fd6652

File tree

1 file changed

+64
-16
lines changed

1 file changed

+64
-16
lines changed

include/aws/common/atomics_msvc.inl

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020

2121
AWS_EXTERN_C_BEGIN
2222

23-
#if !(defined(_M_IX86) || defined(_M_X64))
24-
# error Atomics are not currently supported for non-x86 MSVC platforms
23+
#if !(defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64))
24+
# error Atomics are not currently supported for non-x86 or ARM64 MSVC platforms
2525

2626
/*
2727
* In particular, it's not clear that seq_cst will work properly on non-x86
@@ -63,6 +63,24 @@ AWS_EXTERN_C_BEGIN
6363
* this use case.
6464
*/
6565

66+
/**
67+
* Some general notes about ARM environments:
68+
* ARM processors uses a weak memory model as opposed to the strong memory model used by Intel processors
69+
* This means more permissible memory ordering allowed between stores and loads.
70+
*
71+
* Thus ARM port will need more hardware fences/barriers to assure developer intent.
72+
* Memory barriers will prevent reordering stores and loads accross them depending on their type
73+
* (read write, write only, read only ...)
74+
*
75+
* For more information about ARM64 memory ordering,
76+
* see https://developer.arm.com/documentation/102336/0100/Memory-ordering
77+
* For more information about Memory barriers,
78+
* see https://developer.arm.com/documentation/102336/0100/Memory-barriers
79+
* For more information about Miscosoft Interensic ARM64 APIs,
80+
* see https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170
81+
* Note: wrt _Interlocked[Op]64 is the same for ARM64 and x64 processors
82+
*/
83+
6684
#ifdef _M_IX86
6785
# define AWS_INTERLOCKED_INT(x) _Interlocked##x
6886
typedef long aws_atomic_impl_int_t;
@@ -71,6 +89,32 @@ typedef long aws_atomic_impl_int_t;
7189
typedef long long aws_atomic_impl_int_t;
7290
#endif
7391

92+
#ifdef _M_ARM64
93+
/* Hardware Read Write barrier, prevents all memory operations to cross the barrier in both directions */
94+
# define AWS_RW_BARRIER() __dmb(_ARM64_BARRIER_SY)
95+
/* Hardware Read barrier, prevents all memory operations to cross the barrier upwards */
96+
# define AWS_R_BARRIER() __dmb(_ARM64_BARRIER_LD)
97+
/* Hardware Write barrier, prevents all memory operations to cross the barrier downwards */
98+
# define AWS_W_BARRIER() __dmb(_ARM64_BARRIER_ST)
99+
/* Software barrier, prevents the compiler from reodering the operations across the barrier */
100+
# define AWS_SW_BARRIER() _ReadWriteBarrier();
101+
#else
102+
/* hardware barriers, do nothing on x86 since it has a strong memory model
103+
* as described in the section above: some general notes
104+
*/
105+
# define AWS_RW_BARRIER()
106+
# define AWS_R_BARRIER()
107+
# define AWS_W_BARRIER()
108+
/*
109+
* x86: only a compiler barrier is required. For seq_cst, we must use some form of interlocked operation for
110+
* writes, but that's the caller's responsibility.
111+
*
112+
* Volatile ops may or may not imply this barrier, depending on the /volatile: switch, but adding an extra
113+
* barrier doesn't hurt.
114+
*/
115+
# define AWS_SW_BARRIER() _ReadWriteBarrier(); /* software barrier */
116+
#endif
117+
74118
static inline void aws_atomic_priv_check_order(enum aws_memory_order order) {
75119
#ifndef NDEBUG
76120
switch (order) {
@@ -107,14 +151,8 @@ static inline void aws_atomic_priv_barrier_before(enum aws_memory_order order, e
107151
return;
108152
}
109153

110-
/*
111-
* x86: only a compiler barrier is required. For seq_cst, we must use some form of interlocked operation for
112-
* writes, but that's the caller's responsibility.
113-
*
114-
* Volatile ops may or may not imply this barrier, depending on the /volatile: switch, but adding an extra
115-
* barrier doesn't hurt.
116-
*/
117-
_ReadWriteBarrier();
154+
AWS_RW_BARRIER();
155+
AWS_SW_BARRIER();
118156
}
119157

120158
static inline void aws_atomic_priv_barrier_after(enum aws_memory_order order, enum aws_atomic_mode_priv mode) {
@@ -131,11 +169,8 @@ static inline void aws_atomic_priv_barrier_after(enum aws_memory_order order, en
131169
return;
132170
}
133171

134-
/*
135-
* x86: only a compiler barrier is required. For seq_cst, we must use some form of interlocked operation for
136-
* writes, but that's the caller's responsibility.
137-
*/
138-
_ReadWriteBarrier();
172+
AWS_RW_BARRIER();
173+
AWS_SW_BARRIER();
139174
}
140175

141176
/**
@@ -344,16 +379,29 @@ void aws_atomic_thread_fence(enum aws_memory_order order) {
344379
AWS_INTERLOCKED_INT(Exchange)(&x, 1);
345380
break;
346381
case aws_memory_order_release:
382+
AWS_W_BARRIER();
383+
AWS_SW_BARRIER();
384+
break;
347385
case aws_memory_order_acquire:
386+
AWS_R_BARRIER();
387+
AWS_SW_BARRIER();
388+
break;
348389
case aws_memory_order_acq_rel:
349-
_ReadWriteBarrier();
390+
AWS_RW_BARRIER();
391+
AWS_SW_BARRIER();
350392
break;
351393
case aws_memory_order_relaxed:
352394
/* no-op */
353395
break;
354396
}
355397
}
356398

399+
/* prevent conflicts with other files that might pick the same names */
400+
#undef AWS_RW_BARRIER
401+
#undef AWS_R_BARRIER
402+
#undef AWS_W_BARRIER
403+
#undef AWS_SW_BARRIER
404+
357405
#define AWS_ATOMICS_HAVE_THREAD_FENCE
358406
AWS_EXTERN_C_END
359407
#endif

0 commit comments

Comments
 (0)