Skip to content

Commit fe13556

Browse files
grey-eminenceigcbot
authored andcommitted
IMF LA open-sourcing. FP64 expm1. (#23)
1 parent 5223f7d commit fe13556

File tree

2 files changed

+297
-6
lines changed

2 files changed

+297
-6
lines changed
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
27+
#include "../imf.h"
28+
#pragma OPENCL FP_CONTRACT OFF
29+
typedef struct
30+
{
31+
unsigned long Exp_tbl_H[16];
32+
unsigned long Exp_tbl_L[16];
33+
unsigned long L2E;
34+
unsigned long Shifter;
35+
unsigned long Threshold;
36+
unsigned long SgnMask;
37+
unsigned long L2H;
38+
unsigned long L2L;
39+
unsigned long ZThres;
40+
unsigned long EMask;
41+
unsigned long poly_coeff8;
42+
unsigned long poly_coeff7;
43+
unsigned long poly_coeff6;
44+
unsigned long poly_coeff5;
45+
unsigned long poly_coeff4;
46+
unsigned long poly_coeff3;
47+
unsigned long poly_coeff2;
48+
unsigned long One;
49+
unsigned long MOne;
50+
} __internal_dexpm1_la_data_avx512_t;
51+
static __constant __internal_dexpm1_la_data_avx512_t __internal_dexpm1_la_data_avx512 = {
52+
{
53+
0x3ff0000000000000uL, 0x3ff0b5586cf9890fuL, 0x3ff172b83c7d517buL, 0x3ff2387a6e756238uL, 0x3ff306fe0a31b715uL, 0x3ff3dea64c123422uL,
54+
0x3ff4bfdad5362a27uL, 0x3ff5ab07dd485429uL, 0x3ff6a09e667f3bcduL, 0x3ff7a11473eb0187uL, 0x3ff8ace5422aa0dbuL, 0x3ff9c49182a3f090uL,
55+
0x3ffae89f995ad3aduL, 0x3ffc199bdd85529cuL, 0x3ffd5818dcfba487uL, 0x3ffea4afa2a490dauL}
56+
, {
57+
0x0000000000000000uL, 0x3c979aa65d837b6duL, 0xbc801b15eaa59348uL, 0x3c968efde3a8a894uL, 0x3c834d754db0abb6uL, 0x3c859f48a72a4c6duL,
58+
0x3c7690cebb7aafb0uL, 0x3c9063e1e21c5409uL, 0xbc93b3efbf5e2228uL, 0xbc7b32dcb94da51duL, 0x3c8db72fc1f0eab4uL, 0x3c71affc2b91ce27uL,
59+
0x3c8c1a7792cb3387uL, 0x3c736eae30af0cb3uL, 0x3c74a385a63d07a7uL, 0xbc8ff7128fd391f0uL}
60+
61+
, 0x3ff71547652B82FEuL, 0x42f8000000003ff0uL, 0x40861DA04CBAFE44uL, 0x8000000000000000uL, 0x3fe62e42fefa39efuL, 0x3c7abc9e3b39803fuL,
62+
0xc060000000000000uL, 0xbfffffffffffffffuL, 0x3efa01f8f4be0bb2uL, 0x3f2a020410303d8auL, 0x3f56c1c38e164a2fuL, 0x3f81111110865214uL,
63+
0x3fa5555554ad3d06uL, 0x3fc5555555555656uL, 0x3fe00000000000a2uL, 0x3ff0000000000000uL, 0xbff0000000000000uL
64+
};
65+
66+
typedef struct
67+
{
68+
69+
unsigned long Expm1_HA_table[(1 << 8)];
70+
71+
unsigned long poly_coeff[4];
72+
unsigned long Log2e;
73+
unsigned long L2H;
74+
unsigned long L2L;
75+
unsigned long ExpAddConst;
76+
unsigned long IndexMask;
77+
unsigned long ExpMask;
78+
unsigned long HalfMask;
79+
unsigned long MOne;
80+
unsigned long AbsMask;
81+
unsigned long Threshold;
82+
unsigned long L2;
83+
unsigned long ExpAddConst2;
84+
unsigned long IndexMask2;
85+
unsigned long ExpMask2;
86+
} __internal_dexpm1_la_data_t;
87+
static __constant __internal_dexpm1_la_data_t __internal_dexpm1_la_data = {
88+
89+
{
90+
0x0000000000000000uL, 0x0000000000000000uL, 0x0000163da8000000uL, 0x3e3fb33356d84a67uL, 0x00002c9a40000000uL, 0xbe3887f9f1190835uL,
91+
0x00004315e8000000uL, 0x3e1b9fe12f5ce3e7uL, 0x000059b0d0000000uL, 0x3e48ac2ba1d73e2auL, 0x0000706b28000000uL, 0x3e3ddf6ddc6dc404uL,
92+
0x0000874518000000uL, 0x3e1d66f20230d7c9uL, 0x00009e3ec8000000uL, 0x3e46379c1a290f03uL, 0x0000b55870000000uL, 0xbe4833b784eb3a37uL,
93+
0x0000cc9228000000uL, 0x3e4b923fba03db83uL, 0x0000e3ec30000000uL, 0x3e469e8d10103a17uL, 0x0000fb66b0000000uL, 0xbdb2ce50dcdf6e22uL,
94+
0x00011301d0000000uL, 0x3df25b50a4ebbf1buL, 0x00012abdc0000000uL, 0x3e1b0c72fee4aeb5uL, 0x0001429ab0000000uL, 0xbe356d2204cbefe7uL,
95+
0x00015a98c8000000uL, 0x3e24b1ca24901aaeuL, 0x000172b840000000uL, 0xbe4c15742919041cuL, 0x00018af938000000uL, 0x3e2191bd3777ee17uL,
96+
0x0001a35be8000000uL, 0x3e4b7e5ba9e5b4c8uL, 0x0001bbe088000000uL, 0xbe4fdd19632a70c7uL, 0x0001d48730000000uL, 0x3e368b9aa7805b80uL,
97+
0x0001ed5020000000uL, 0x3e47e6c8e5c40d00uL, 0x0002063b88000000uL, 0x3e18a3358ee3bac1uL, 0x00021f4990000000uL, 0x3e37ddc962552fd3uL,
98+
0x0002387a70000000uL, 0xbe38a9dc7993e052uL, 0x000251ce50000000uL, 0xbe135670329f5521uL, 0x00026b4568000000uL, 0xbe40ec1916d42cc6uL,
99+
0x000284dfe0000000uL, 0x3e3f5638096cf15duL, 0x00029e9df8000000uL, 0xbe470108f69ed175uL, 0x0002b87fd0000000uL, 0x3e2b5b31ffbbd48duL,
100+
0x0002d285a8000000uL, 0xbe31bfcf4bff6e2buL, 0x0002ecafa8000000uL, 0x3e33e2f5611ca0f4uL, 0x000306fe08000000uL, 0x3e418db8a96f46aduL,
101+
0x0003217100000000uL, 0xbe4d993e76563187uL, 0x00033c08b0000000uL, 0x3e4320b7fa64e431uL, 0x000356c560000000uL, 0xbe1b5803cdae772euL,
102+
0x000371a738000000uL, 0xbe28aac6ab1d7560uL, 0x00038cae70000000uL, 0xbe47d13cd3d2b1a8uL, 0x0003a7db38000000uL, 0xbe48d30048af21b7uL,
103+
0x0003c32dc0000000uL, 0x3e489d47242000f9uL, 0x0003dea650000000uL, 0xbe4f6e5eee525f6fuL, 0x0003fa4508000000uL, 0xbe4a9bff22fa047fuL,
104+
0x0004160a20000000uL, 0x3e3f72e29f84325cuL, 0x000431f5d8000000uL, 0x3e350a896dc70444uL, 0x00044e0860000000uL, 0x3e18624b40c4dbd0uL,
105+
0x00046a41f0000000uL, 0xbe4717fd446d7686uL, 0x000486a2b8000000uL, 0xbe41f6197f61f2e2uL, 0x0004a32af0000000uL, 0x3e2afa7bcce5b17auL,
106+
0x0004bfdad8000000uL, 0xbe464eaec715e343uL, 0x0004dcb298000000uL, 0x3e3fddd0d63b36efuL, 0x0004f9b278000000uL, 0xbe362d35952cc275uL,
107+
0x000516daa0000000uL, 0x3e467b320e0897a9uL, 0x0005342b58000000uL, 0xbe362b07e20f57c4uL, 0x000551a4c8000000uL, 0x3e42ec9076297631uL,
108+
0x00056f4738000000uL, 0xbe34ad8259913500uL, 0x00058d12d8000000uL, 0xbe4b41c016d6a1eauL, 0x0005ab07e0000000uL, 0xbe45bd5eb539b67fuL,
109+
0x0005c92688000000uL, 0x3e42ca35b80e258euL, 0x0005e76f18000000uL, 0xbe4296f5bc8b20dauL, 0x000605e1b8000000uL, 0x3e376dc08b076f59uL,
110+
0x0006247eb0000000uL, 0x3e0d2ac258f87d03uL, 0x0006434638000000uL, 0xbe4999e701c483c7uL, 0x0006623880000000uL, 0x3e42a91124893ecfuL,
111+
0x00068155d8000000uL, 0xbe4d9ab467bf1d47uL, 0x0006a09e68000000uL, 0xbe380c4336f74d05uL, 0x0006c01278000000uL, 0xbe47a12a08944ab3uL,
112+
0x0006dfb240000000uL, 0xbe4cd72e886ef8eauL, 0x0006ff7df8000000uL, 0x3e3519483cf87e1buL, 0x00071f75e8000000uL, 0x3e2d8bee7ba46e1euL,
113+
0x00073f9a48000000uL, 0x3e24b02e77ab934auL, 0x00075feb58000000uL, 0xbe3bd98374091656uL, 0x0007806950000000uL, 0xbe00d1604f328fecuL,
114+
0x0007a11470000000uL, 0x3e4f580c36bea881uL, 0x0007c1ed00000000uL, 0x3e330c1327c49334uL, 0x0007e2f338000000uL, 0xbe330b19defa2fd4uL,
115+
0x0008042758000000uL, 0xbe4e0f2f724f90ccuL, 0x0008258998000000uL, 0x3e34cce128acf88buL, 0x0008471a48000000uL, 0xbe3dc385331ad094uL,
116+
0x000868d998000000uL, 0x3e4a2497640720eduL, 0x00088ac7d8000000uL, 0x3e38a669966530bduL, 0x0008ace540000000uL, 0x3e415506dadd3e2buL,
117+
0x0008cf3218000000uL, 0xbe34abb7410d55e3uL, 0x0008f1ae98000000uL, 0x3e31577362b98274uL, 0x0009145b08000000uL, 0x3e4c8ffe2c4530dauL,
118+
0x00093737b0000000uL, 0x3e29b8bc9e8a0388uL, 0x00095a44c8000000uL, 0x3e4e4290774da41buL, 0x00097d82a0000000uL, 0xbe00d8d83a30b6f8uL,
119+
0x0009a0f170000000uL, 0x3e2940f737462137uL, 0x0009c49180000000uL, 0x3e451f8480e3e236uL, 0x0009e86318000000uL, 0x3e3e323231824ca8uL,
120+
0x000a0c6678000000uL, 0x3e4aef2b2594d6d4uL, 0x000a309bf0000000uL, 0xbe4dae966539f470uL, 0x000a5503b0000000uL, 0x3e41f12ae45a1225uL,
121+
0x000a799e10000000uL, 0x3e49859ac3796fd9uL, 0x000a9e6b58000000uL, 0xbe44301205e0a6deuL, 0x000ac36bc0000000uL, 0xbe0606431f9234cbuL,
122+
0x000ae89f98000000uL, 0x3e35ad3ad5e8734duL, 0x000b0e0728000000uL, 0x3e38db66590842aduL, 0x000b33a2b8000000uL, 0x3e13c57ebdaff43auL,
123+
0x000b597290000000uL, 0xbe40d536338e3bf7uL, 0x000b7f76f0000000uL, 0x3e47daf237553d84uL, 0x000ba5b030000000uL, 0x3e2420c930819679uL,
124+
0x000bcc1e90000000uL, 0x3e12f074891ee83duL, 0x000bf2c258000000uL, 0x3e4eb8f0442046b8uL, 0x000c199be0000000uL, 0xbe43d56b1eeef9a7uL,
125+
0x000c40ab60000000uL, 0xbd87c2c975903ef8uL, 0x000c67f130000000uL, 0xbe3a82eb4b5dec80uL, 0x000c8f6d98000000uL, 0xbe4fc8c257729a1euL,
126+
0x000cb720e0000000uL, 0xbe48837cb757e1a1uL, 0x000cdf0b58000000uL, 0xbe4511e031dd83b5uL, 0x000d072d48000000uL, 0x3e403c4bdc687918uL,
127+
0x000d2f8708000000uL, 0x3deb13e315bc2473uL, 0x000d5818e0000000uL, 0xbe4822dbc6d12fd3uL, 0x000d80e318000000uL, 0xbe3367c68447b063uL,
128+
0x000da9e600000000uL, 0x3e4ed9942b84600duL, 0x000dd321f0000000uL, 0x3e480da3025b4aefuL, 0x000dfc9730000000uL, 0x3e4bdcdaf5cb4656uL,
129+
0x000e264618000000uL, 0xbe4852f6baf6c4f0uL, 0x000e502ee8000000uL, 0xbe1d30027630bb40uL, 0x000e7a51f8000000uL, 0x3e4e3a641a5aa459uL,
130+
0x000ea4afa0000000uL, 0x3e452486cc2c7b9duL, 0x000ecf4830000000uL, 0xbe438cc07b927e77uL, 0x000efa1bf0000000uL, 0xbe39ea5d888e02deuL,
131+
0x000f252b38000000uL, 0xbe2288ad162f2d20uL, 0x000f507658000000uL, 0x3e4b722a033a7c26uL, 0x000f7bfdb0000000uL, 0xbe431a0f63b7625auL,
132+
0x000fa7c180000000uL, 0x3e39e90d82e90a7euL, 0x000fd3c228000000uL, 0x3e4c7b8f884badd2uL}
133+
, {
134+
0x3f81111168877F38uL, 0x3fa55555C2A9C0F3uL, 0x3fc555555555541DuL, 0x3fdFFFFFFFFFFE5CuL}
135+
136+
, 0x40671547652B82FEuL, 0x3f762e42fef80000uL, 0x3d41cf79abc9e3b4uL, 0x42f80000001ff800uL, 0x00000000000007f0uL, 0x00000000003ff800uL,
137+
0xfffffffff8000000uL, 0xbff0000000000000uL, 0x7fffffffffffffffuL, 0x40861DA04CBAFE43uL, 0x3f762e42fefa39efuL, 0x43080000000ffc00uL,
138+
0x00000000000003f8uL, 0x00000000001ffc00uL
139+
};
140+
141+
static __constant int_double __dexpm1_la_Tbl_exp[] = {
142+
{0x0000000000000000ull}, {0x0000000000000000ull},
143+
{0x0000d9b0d3158574ull}, {0x3c8cd2523567f613ull},
144+
{0x0001b5586cf9890full}, {0x3c979aa65d837b6dull},
145+
{0x00009301d0125b51ull}, {0xbc9556522a2fbd0eull},
146+
{0x000372b83c7d517bull}, {0xbc801b15eaa59348ull},
147+
{0x000354873168b9aaull}, {0x3c9aecf73e3a2f60ull},
148+
{0x0001387a6e756238ull}, {0x3c968efde3a8a894ull},
149+
{0x00011e9df51fdee1ull}, {0x3c82f7e16d09ab31ull},
150+
{0x000706fe0a31b715ull}, {0x3c834d754db0abb6ull},
151+
{0x0007f1a7373aa9cbull}, {0xbc924aedcc4b5069ull},
152+
{0x0006dea64c123422ull}, {0x3c859f48a72a4c6dull},
153+
{0x0001ce086061892dull}, {0x3c4363ed60c2ac12ull},
154+
{0x0002bfdad5362a27ull}, {0x3c7690cebb7aafb0ull},
155+
{0x0003b42b569d4f82ull}, {0xbc78dec6bd0f3860ull},
156+
{0x0002ab07dd485429ull}, {0x3c9063e1e21c5409ull},
157+
{0x0001a47eb03a5585ull}, {0xbc8c33c53bef4da8ull},
158+
{0x000ea09e667f3bcdull}, {0xbc93b3efbf5e2229ull},
159+
{0x000f9f75e8ec5f74ull}, {0xbc781f647e5a3eceull},
160+
{0x000ea11473eb0187ull}, {0xbc7b32dcb94da51dull},
161+
{0x0001a589994cce13ull}, {0xbc9369b6f13b3734ull},
162+
{0x0002ace5422aa0dbull}, {0x3c8db72fc1f0eab5ull},
163+
{0x0003b737b0cdc5e5ull}, {0xbc5da9b88b6c1e29ull},
164+
{0x0002c49182a3f090ull}, {0x3c71affc2b91ce27ull},
165+
{0x0001d503b23e255dull}, {0xbc91bbd1d3bcbb15ull},
166+
{0x0006e89f995ad3adull}, {0x3c8c1a7792cb3386ull},
167+
{0x0007ff76f2fb5e47ull}, {0xbc68d6f438ad9334ull},
168+
{0x0001199bdd85529cull}, {0x3c736eae30af0cb3ull},
169+
{0x00013720dcef9069ull}, {0x3c676b2c6c921967ull},
170+
{0x00035818dcfba487ull}, {0x3c74a385a63d07a8ull},
171+
{0x00037c97337b9b5full}, {0xbc82d52107b43e20ull},
172+
{0x0001a4afa2a490daull}, {0xbc8ff7128fd391f0ull},
173+
{0x0000d0765b6e4540ull}, {0x3c8a64a931d185eeull},
174+
175+
};
176+
177+
static __constant int_double __dexpm1_la_dc5 = { 0x3f56c17256147174UL };
178+
static __constant int_double __dexpm1_la_dc4 = { 0x3f811115c0928f3bUL };
179+
static __constant int_double __dexpm1_la_dc3 = { 0x3fa5555555547138UL };
180+
static __constant int_double __dexpm1_la_dc2 = { 0x3fc5555555547d38UL };
181+
static __constant int_double __dexpm1_la_dc1 = { 0x3fe0000000000000UL };
182+
static __constant int_double __dexpm1_la_dc0 = { 0x3c6712f33cb068a3UL };
183+
184+
static __constant int_float __dexpm1_la_fL2E = { 0x3FB8AA3Bu };
185+
static __constant int_float __dexpm1_la_fShifter = { 0x48c07fe0u };
186+
187+
static __constant int_double __dexpm1_la_p_NL2H = { 0xbfe62e42fefa39efUL };
188+
189+
static __constant int_double __dexpm1_la_p_NL2L = { 0xbc7abc9e3b39803fUL };
190+
191+
__attribute__((always_inline))
192+
inline int __internal_dexpm1_la_cout (double *pxin, double *pres)
193+
{
194+
int nRet = 0;
195+
double xin = *pxin;
196+
int_double x, T, Tlr, sc, xa, res;
197+
double dN, R, poly, Th, poly_t;
198+
int_float x0f, fN, fR, fS;
199+
int index;
200+
201+
x0f.f = (float) xin;
202+
203+
fS.f = __builtin_spirv_OpenCL_fma_f32_f32_f32 (x0f.f, __dexpm1_la_fL2E.f, __dexpm1_la_fShifter.f);
204+
205+
fN.f = fS.f - __dexpm1_la_fShifter.f;
206+
207+
dN = (double) fN.f;
208+
R = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dN, __dexpm1_la_p_NL2H.f, xin);
209+
R = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dN, __dexpm1_la_p_NL2L.f, R);
210+
211+
index = (fS.w & 0x1f) << 1;
212+
213+
T.w32[1] = (fS.w << (20 - 5)) ^ __dexpm1_la_Tbl_exp[index].w32[1];
214+
T.w32[0] = __dexpm1_la_Tbl_exp[index].w32[0];
215+
216+
Tlr.w32[1] = __dexpm1_la_Tbl_exp[index + 1].w32[1];
217+
Tlr.w32[0] = 0;
218+
219+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (__dexpm1_la_dc5.f, R, __dexpm1_la_dc4.f);
220+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dexpm1_la_dc3.f);
221+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dexpm1_la_dc2.f);
222+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dexpm1_la_dc1.f);
223+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, 1.0);
224+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, Tlr.f);
225+
226+
Th = T.f - 1.0;
227+
228+
res.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (T.f, poly, Th);
229+
230+
if (__builtin_spirv_OpenCL_fabs_f32 (x0f.f) <= 708.0f)
231+
{
232+
*pres = res.f;
233+
return nRet;
234+
}
235+
236+
if (x0f.f < 0)
237+
{
238+
*pres = -1.0;
239+
return nRet;
240+
}
241+
242+
if (!(x0f.f < 1024.0f))
243+
{
244+
245+
x.f = xin;
246+
xa.w = x.w & 0x7fffffffffffffffUL;
247+
if (xa.w > 0x7ff0000000000000UL)
248+
{
249+
*pres = x.f + res.f;
250+
return nRet;
251+
}
252+
253+
res.w = (res.w & 0x0007ffffffffffffUL) | 0x7fd0000000000000UL;
254+
res.f = res.f * xin;
255+
nRet = 3;
256+
{
257+
*pres = res.f;
258+
return nRet;
259+
}
260+
}
261+
262+
T.w32[1] = ((fS.w - 512 * 32) << (20 - 5)) ^ __dexpm1_la_Tbl_exp[index].w32[1];
263+
T.w32[0] = __dexpm1_la_Tbl_exp[index].w32[0];
264+
265+
res.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (T.f, poly, T.f);
266+
sc.w = 0x5ff0000000000000UL;
267+
res.f *= sc.f;
268+
269+
if (res.w == 0x7ff0000000000000UL)
270+
nRet = 3;
271+
272+
*pres = res.f;
273+
274+
return nRet;
275+
}
276+
277+
double __ocl_svml_expm1 (double a)
278+
{
279+
280+
double va1;
281+
double vr1;
282+
unsigned int vm;
283+
284+
double r;
285+
286+
va1 = a;;
287+
288+
__internal_dexpm1_la_cout (&va1, &vr1);
289+
r = vr1;;
290+
291+
return r;
292+
293+
}

IGC/BiFModule/Implementation/Math/expm1.cl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2929

3030
#include "../ExternalLibraries/libclc/expm1.cl"
3131

32-
#if defined(cl_khr_fp64)
33-
#include "../ExternalLibraries/libclc/doubles.cl"
34-
#endif // defined(cl_khr_fp64)
35-
36-
32+
#if defined(cl_khr_fp64)
33+
#include "../IMF/FP64/expm1_d_la.cl"
34+
#endif // defined(cl_khr_fp64)
3735

3836
INLINE float __builtin_spirv_OpenCL_expm1_f32( float x )
3937
{
@@ -53,7 +51,7 @@ GENERATE_VECTOR_FUNCTIONS_1ARG_LOOP( __builtin_spirv_OpenCL_expm1, float, float,
5351

5452
INLINE double __builtin_spirv_OpenCL_expm1_f64( double x )
5553
{
56-
return libclc_expm1_f64(x);
54+
return __ocl_svml_expm1(x);
5755
}
5856

5957
GENERATE_VECTOR_FUNCTIONS_1ARG_LOOP( __builtin_spirv_OpenCL_expm1, double, double, f64 )

0 commit comments

Comments
 (0)