Skip to content

Commit 117af79

Browse files
committed
Actually fix the patch
1 parent 51afc7a commit 117af79

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

easybuild/easyconfigs/p/PyTorch/PyTorch-1.9.0-foss-2020b.eb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ checksums = [
4646
'7a6e512274f0b8673f4f207a5bc53387d88be7e79833f42d20365668b2118071',
4747
# PyTorch-1.9.0_avoid-failures-in-test_unary_ufuncs.patch
4848
'f600e6831f8a03af007845687d1e0f65b2394ca89a9dab5178e2cdc9bd384d43',
49-
'd1f85b2f0f03b500a61b2456d6ec63fccb62f7edc350a820553e2891ec23bf13', # PyTorch-1.9.0_fix-vsx-vector-functions.patch
49+
'a4733b6b16a0db4ee5f85f2b103abc29bd711cfc5253f8dd8494d2b0c1509516', # PyTorch-1.9.0_fix-vsx-vector-functions.patch
5050
# PyTorch-1.9.0_skip-lstm-serialization-test.patch
5151
'0fc14e29bd7530bcc09f4212df3c846072b1313216da86b827e102b85d695f49',
5252
]

easybuild/easyconfigs/p/PyTorch/PyTorch-1.9.0-fosscuda-2020b-imkl.eb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ checksums = [
6464
'8e8b417782e2f3004462c32338e12685e7296d15207f3e3087dcb8015e648f98', # PyTorch-1.9.0_fix-testnn-on-A100.patch
6565
# PyTorch-1.9.0_fix-use-after-destruct-in-cudaipctypes.patch
6666
'67960bf9140baf004b07e29f7c2b338e7bc4e4e4f2c931768be44f58526e605f',
67-
'd1f85b2f0f03b500a61b2456d6ec63fccb62f7edc350a820553e2891ec23bf13', # PyTorch-1.9.0_fix-vsx-vector-functions.patch
67+
'a4733b6b16a0db4ee5f85f2b103abc29bd711cfc5253f8dd8494d2b0c1509516', # PyTorch-1.9.0_fix-vsx-vector-functions.patch
6868
# PyTorch-1.9.0_increase-test-cuda-tolerance.patch
6969
'73de855ab1ed38043c7fb2a983927786b83d7547aefed926f19e554e2214838a',
7070
# PyTorch-1.9.0_increase-tolerance-for-distributed-tests.patch

easybuild/easyconfigs/p/PyTorch/PyTorch-1.9.0-fosscuda-2020b.eb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ checksums = [
6363
'8e8b417782e2f3004462c32338e12685e7296d15207f3e3087dcb8015e648f98', # PyTorch-1.9.0_fix-testnn-on-A100.patch
6464
# PyTorch-1.9.0_fix-use-after-destruct-in-cudaipctypes.patch
6565
'67960bf9140baf004b07e29f7c2b338e7bc4e4e4f2c931768be44f58526e605f',
66-
'd1f85b2f0f03b500a61b2456d6ec63fccb62f7edc350a820553e2891ec23bf13', # PyTorch-1.9.0_fix-vsx-vector-functions.patch
66+
'a4733b6b16a0db4ee5f85f2b103abc29bd711cfc5253f8dd8494d2b0c1509516', # PyTorch-1.9.0_fix-vsx-vector-functions.patch
6767
# PyTorch-1.9.0_increase-test-cuda-tolerance.patch
6868
'73de855ab1ed38043c7fb2a983927786b83d7547aefed926f19e554e2214838a',
6969
# PyTorch-1.9.0_increase-tolerance-for-distributed-tests.patch

easybuild/easyconfigs/p/PyTorch/PyTorch-1.9.0_fix-vsx-vector-functions.patch

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,35 @@ index 2a1a87aa72..5bcf818232 100644
160160
}
161161
Vec256<float> C10_ALWAYS_INLINE ceil() const {
162162
return {vec_ceil(_vec0), vec_ceil(_vec1)};
163+
@@ -593,27 +593,7 @@ class Vec256<float> {
164+
}
165+
166+
Vec256<float> C10_ALWAYS_INLINE pow(const Vec256<float>& exp) const {
167+
- auto x = *this;
168+
- auto sign_bit = (*this) & sign_mask;
169+
- // |b|
170+
- auto exp_abs = exp.abs();
171+
- auto exp_trunc = exp.trunc();
172+
- Vec256<float> odd_mask;
173+
- odd_mask._vecb0 = (vec_signed(exp._vec0) & vi_1) != vi_0;
174+
- odd_mask._vecb1 = (vec_signed(exp._vec1) & vi_1) != vi_0;
175+
- // using ln fuction
176+
- auto temp = (abs().log() * exp).exp();
177+
-
178+
- // is odd or even check from Sleef
179+
- auto is_int = (exp == exp_trunc) | (exp_abs >= vcheck);
180+
- auto is_odd = odd_mask & is_int & (exp_abs < vcheck);
181+
- // if even then then pow result should be absolute
182+
- auto temp_sign = temp | sign_bit; // copy_sign
183+
- auto out = blendv(temp, temp_sign, is_odd);
184+
- // x<0 and y != N, then NAN
185+
- auto out1 = blendv(out, v_nan, ((exp.floor() != exp) & (x < zero)));
186+
- // y = 0 then 1
187+
- return blendv(out1, one, (exp_abs == zero));
188+
+ return {Sleef_powf4_u10vsx(_vec0, exp._vec0), Sleef_powf4_u10vsx(_vec1, exp._vec1)};
189+
}
190+
191+
Vec256<float> fmod(const Vec256<float>& b) const {
163192
@@ -653,8 +616,8 @@ class Vec256<float> {
164193
DEFINE_MEMBER_OP(operator-, float, vec_sub)
165194
DEFINE_MEMBER_OP(operator*, float, vec_mul)

0 commit comments

Comments
 (0)