|
| 1 | +/*===================== begin_copyright_notice ================================== |
| 2 | +
|
| 3 | +Copyright (c) 2017 Intel Corporation |
| 4 | +
|
| 5 | +Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | +copy of this software and associated documentation files (the |
| 7 | +"Software"), to deal in the Software without restriction, including |
| 8 | +without limitation the rights to use, copy, modify, merge, publish, |
| 9 | +distribute, sublicense, and/or sell copies of the Software, and to |
| 10 | +permit persons to whom the Software is furnished to do so, subject to |
| 11 | +the following conditions: |
| 12 | +
|
| 13 | +The above copyright notice and this permission notice shall be included |
| 14 | +in all copies or substantial portions of the Software. |
| 15 | +
|
| 16 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 | +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 | +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 19 | +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 20 | +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 21 | +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 22 | +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 23 | +
|
| 24 | +
|
| 25 | +======================= end_copyright_notice ==================================*/ |
| 26 | + |
| 27 | +#include "../imf.h" |
| 28 | +#pragma OPENCL FP_CONTRACT OFF |
| 29 | +typedef struct |
| 30 | +{ |
| 31 | + unsigned long _dAbsMask; |
| 32 | + unsigned long _dHalf; |
| 33 | + unsigned long _dSignBit; |
| 34 | + unsigned long _dC1_h; |
| 35 | + unsigned long _dC2; |
| 36 | + unsigned long _dC3; |
| 37 | + unsigned long _dC4; |
| 38 | + unsigned long _dC5; |
| 39 | + unsigned long _dC6; |
| 40 | + unsigned long _dC7; |
| 41 | + unsigned long _dC8; |
| 42 | + unsigned long _dC9; |
| 43 | +} __internal_dcospi_la_data_avx512_t; |
| 44 | +static __constant __internal_dcospi_la_data_avx512_t __internal_dcospi_la_data_avx512 = { |
| 45 | + 0x7FFFFFFFFFFFFFFFuL, |
| 46 | + 0x3fe0000000000000uL, |
| 47 | + 0x8000000000000000uL, |
| 48 | + 0xc00921fb54442d18uL, |
| 49 | + 0x4014abbce625be52uL, |
| 50 | + 0xc00466bc6775aa7duL, |
| 51 | + 0x3fe32d2cce627c9euL, |
| 52 | + 0xbfb50783485523f4uL, |
| 53 | + 0x3f7e3074dfb5bb14uL, |
| 54 | + 0xbf3e8f3677c334d3uL, |
| 55 | + 0x3ef6f7ad23b5cd51uL, |
| 56 | + 0xbea9d46b06ce620euL, |
| 57 | +}; |
| 58 | + |
| 59 | +typedef struct |
| 60 | +{ |
| 61 | + unsigned long _dAbsMask; |
| 62 | + unsigned long _dReductionRangeVal; |
| 63 | + unsigned long _dRangeVal; |
| 64 | + unsigned long _dRShifter; |
| 65 | + unsigned long _dOneHalf; |
| 66 | + |
| 67 | + unsigned long _dC0; |
| 68 | + |
| 69 | + unsigned long _dC1; |
| 70 | + unsigned long _dC2; |
| 71 | + unsigned long _dC3; |
| 72 | + unsigned long _dC4; |
| 73 | + |
| 74 | + unsigned long _dC5; |
| 75 | + unsigned long _dC6; |
| 76 | + unsigned long _dC7; |
| 77 | + unsigned long _dC8; |
| 78 | + unsigned long _dThres; |
| 79 | + unsigned long _dSgnExp; |
| 80 | + |
| 81 | +} __internal_dcospi_la_data_t; |
| 82 | +static __constant __internal_dcospi_la_data_t __internal_dcospi_la_data = { |
| 83 | + 0x7FFFFFFFFFFFFFFFuL, |
| 84 | + 0x42A0000000000000uL, |
| 85 | + 0x7FF0000000000000uL, |
| 86 | + 0x4338000000000000uL, |
| 87 | + 0x3FE0000000000000uL, |
| 88 | + |
| 89 | + 0x400921fb54442d18uL, |
| 90 | + 0xc014abbce625be52uL, |
| 91 | + 0x400466bc6775aa7duL, |
| 92 | + 0xbfe32d2cce627c9euL, |
| 93 | + 0x3fb50783485523f4uL, |
| 94 | + 0xbf7e3074dfb5bb14uL, |
| 95 | + 0x3f3e8f3677c334d3uL, |
| 96 | + 0xbef6f7ad23b5cd51uL, |
| 97 | + 0x3ea9d46b06ce620euL, |
| 98 | + |
| 99 | +}; |
| 100 | + |
| 101 | +static __constant _iml_v2_dp_union_t __dcospi_la_CoutTab[3] = { |
| 102 | + 0x00000000, 0x00000000, |
| 103 | + 0x00000000, 0x7FF00000, |
| 104 | +}; |
| 105 | + |
| 106 | +#pragma float_control(push) |
| 107 | +#pragma float_control(precise,on) |
| 108 | +__attribute__((always_inline)) |
| 109 | +inline int __internal_dcospi_la_cout (double *a, double *r) |
| 110 | +{ |
| 111 | + double absx; |
| 112 | + int nRet = 0; |
| 113 | + |
| 114 | + absx = (*a); |
| 115 | + (((_iml_v2_dp_union_t *) & absx)->dwords.hi_dword = (((_iml_v2_dp_union_t *) & absx)->dwords.hi_dword & 0x7FFFFFFF) | ((_iml_uint32_t) (0) << 31)); |
| 116 | + if (!(((((_iml_v2_dp_union_t *) & (*a))->dwords.hi_dword >> 20) & 0x7FF) != 0x7FF)) |
| 117 | + { |
| 118 | + |
| 119 | + if ((((_iml_v2_dp_union_t *) & (absx))->hex[0] == ((__constant _iml_v2_dp_union_t *) & (((__constant double *) __dcospi_la_CoutTab)[1]))->hex[0]) |
| 120 | + && (((_iml_v2_dp_union_t *) & (absx))->hex[1] == |
| 121 | + ((__constant _iml_v2_dp_union_t *) & (((__constant double *) __dcospi_la_CoutTab)[1]))->hex[1])) |
| 122 | + { |
| 123 | + |
| 124 | + (*r) = (*a) * ((__constant double *) __dcospi_la_CoutTab)[0]; |
| 125 | + |
| 126 | + nRet = 1; |
| 127 | + } |
| 128 | + else |
| 129 | + { |
| 130 | + |
| 131 | + (*r) = ((*a) + (*a)); |
| 132 | + } |
| 133 | + } |
| 134 | + return nRet; |
| 135 | +} |
| 136 | + |
| 137 | +#pragma float_control(pop) |
| 138 | +double __ocl_svml_cospi (double a) |
| 139 | +{ |
| 140 | + |
| 141 | + double va1; |
| 142 | + double vr1; |
| 143 | + unsigned int vm; |
| 144 | + |
| 145 | + double r; |
| 146 | + |
| 147 | + va1 = a;; |
| 148 | + |
| 149 | + { |
| 150 | + |
| 151 | + double dX; |
| 152 | + double dAbsX; |
| 153 | + double dExp; |
| 154 | + double dRangeMask; |
| 155 | + unsigned long lRangeMask; |
| 156 | + double dReductionRangeMask; |
| 157 | + unsigned long lReductionRangeMask; |
| 158 | + unsigned int mReductionRangeMask; |
| 159 | + double dSign; |
| 160 | + double dSignReduced; |
| 161 | + double dSignRes; |
| 162 | + double dN; |
| 163 | + double dY; |
| 164 | + double dR; |
| 165 | + double dRp2; |
| 166 | + double dPoly; |
| 167 | + |
| 168 | + double dAbsMask; |
| 169 | + double dRangeVal; |
| 170 | + double dReductionRangeVal; |
| 171 | + double dRShifter; |
| 172 | + double dInvPI; |
| 173 | + double dPI; |
| 174 | + double dHalfPI; |
| 175 | + double dOneHalf; |
| 176 | + double dC0; |
| 177 | + double dPiToRad; |
| 178 | + |
| 179 | + double dC1; |
| 180 | + double dC2; |
| 181 | + double dC3; |
| 182 | + double dC4; |
| 183 | + double dC5; |
| 184 | + double dC6; |
| 185 | + double dC7; |
| 186 | + double dC8; |
| 187 | + |
| 188 | + dX = va1; |
| 189 | + |
| 190 | + vm = 0; |
| 191 | + |
| 192 | + dAbsMask = as_double (__internal_dcospi_la_data._dAbsMask); |
| 193 | + dAbsX = as_double ((as_ulong (dX) & as_ulong (dAbsMask))); |
| 194 | + |
| 195 | + dR = dAbsX; |
| 196 | + |
| 197 | + dReductionRangeVal = as_double (__internal_dcospi_la_data._dReductionRangeVal); |
| 198 | + dReductionRangeMask = as_double ((unsigned long) (((!(dAbsX <= dReductionRangeVal)) ? 0xffffffffffffffff : 0x0))); |
| 199 | + lReductionRangeMask = as_ulong (dReductionRangeMask); |
| 200 | + mReductionRangeMask = 0; |
| 201 | + mReductionRangeMask = lReductionRangeMask; |
| 202 | + if ((mReductionRangeMask) != 0) |
| 203 | + { |
| 204 | + |
| 205 | + dRangeVal = as_double (__internal_dcospi_la_data._dRangeVal); |
| 206 | + dExp = as_double ((as_ulong (dRangeVal) & as_ulong (dAbsX))); |
| 207 | + dRangeMask = as_double ((unsigned long) ((dExp == dRangeVal) ? 0xffffffffffffffff : 0x0)); |
| 208 | + lRangeMask = as_ulong (dRangeMask); |
| 209 | + vm = 0; |
| 210 | + vm = lRangeMask; |
| 211 | + |
| 212 | + { |
| 213 | + |
| 214 | + double dX; |
| 215 | + double dShifterMod; |
| 216 | + double dShifterThreshold; |
| 217 | + double dShifterMask; |
| 218 | + double dShifterPos; |
| 219 | + double dShifter; |
| 220 | + double dDirect; |
| 221 | + double dInverse; |
| 222 | + double dShiftedN; |
| 223 | + double dN; |
| 224 | + double dZero; |
| 225 | + dX = dAbsX; |
| 226 | + dShifterThreshold = as_double (0x43A0000000000000uL); |
| 227 | + dShifterMask = as_double ((unsigned long) ((dX < dShifterThreshold) ? 0xffffffffffffffff : 0x0)); |
| 228 | + |
| 229 | + dShifterPos = as_double (0x43B8000000000000uL); |
| 230 | + dZero = as_double (0x0000000000000000uL); |
| 231 | + dShifter = as_double ((((~as_ulong (dShifterMask)) & as_ulong (dZero)) | (as_ulong (dShifterMask) & as_ulong (dShifterPos)))); |
| 232 | + |
| 233 | + dShiftedN = (dShifter + dAbsX); |
| 234 | + dN = (dShiftedN - dShifter); |
| 235 | + dR = (dAbsX - dN); |
| 236 | + |
| 237 | + } |
| 238 | + |
| 239 | + dR = as_double ((((~as_ulong (dReductionRangeMask)) & as_ulong (dAbsX)) | (as_ulong (dReductionRangeMask) & as_ulong (dR)))); |
| 240 | + } |
| 241 | + |
| 242 | + dOneHalf = as_double (__internal_dcospi_la_data._dOneHalf); |
| 243 | + dX = (dR + dOneHalf); |
| 244 | + |
| 245 | + dRShifter = as_double (__internal_dcospi_la_data._dRShifter); |
| 246 | + dY = (dX + dRShifter); |
| 247 | + dN = (dY - dRShifter); |
| 248 | + |
| 249 | + dSignRes = as_double (((unsigned long) as_ulong (dY) << (63))); |
| 250 | + |
| 251 | + dN = (dN - dOneHalf); |
| 252 | + dR = (dR - dN); |
| 253 | + |
| 254 | + dR = as_double ((as_ulong (dR) ^ as_ulong (dSignRes))); |
| 255 | + dRp2 = (dR * dR); |
| 256 | + |
| 257 | + dC8 = as_double (__internal_dcospi_la_data._dC8); |
| 258 | + dC7 = as_double (__internal_dcospi_la_data._dC7); |
| 259 | + dC6 = as_double (__internal_dcospi_la_data._dC6); |
| 260 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dC8, dRp2, dC7); |
| 261 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dRp2, dC6); |
| 262 | + dC5 = as_double (__internal_dcospi_la_data._dC5); |
| 263 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dRp2, dC5); |
| 264 | + dC4 = as_double (__internal_dcospi_la_data._dC4); |
| 265 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dRp2, dC4); |
| 266 | + dC3 = as_double (__internal_dcospi_la_data._dC3); |
| 267 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dRp2, dC3); |
| 268 | + |
| 269 | + dC2 = as_double (__internal_dcospi_la_data._dC2); |
| 270 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dRp2, dC2); |
| 271 | + dC1 = as_double (__internal_dcospi_la_data._dC1); |
| 272 | + dPoly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dRp2, dC1); |
| 273 | + dC0 = as_double (__internal_dcospi_la_data._dC0); |
| 274 | + dC0 = (dC0 * dR); |
| 275 | + dPoly = (dPoly * dRp2); |
| 276 | + |
| 277 | + vr1 = __builtin_spirv_OpenCL_fma_f64_f64_f64 (dPoly, dR, dC0); |
| 278 | + } |
| 279 | + |
| 280 | + if ((vm) != 0) |
| 281 | + { |
| 282 | + double _vapi_arg1[1]; |
| 283 | + double _vapi_res1[1]; |
| 284 | + ((double *) _vapi_arg1)[0] = va1; |
| 285 | + ((double *) _vapi_res1)[0] = vr1; |
| 286 | + __internal_dcospi_la_cout (_vapi_arg1, _vapi_res1); |
| 287 | + vr1 = ((double *) _vapi_res1)[0]; |
| 288 | + }; |
| 289 | + r = vr1;; |
| 290 | + |
| 291 | + return r; |
| 292 | + |
| 293 | +} |
0 commit comments