Skip to content

Commit e1234c7

Browse files
committed
feat: add and fix cuda files
1 parent 886fe84 commit e1234c7

File tree

11 files changed

+222
-94
lines changed

11 files changed

+222
-94
lines changed

Cargo.toml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,16 @@ candle-datasets = "0.8.1"
2020
candle-nn = "0.8.1"
2121
candle-transformers = "0.8.1"
2222
chrono = "0.4.38"
23-
cudarc = { version = "0.13.9", optional = true, features = ["cuda-12080"] }
23+
cudarc = { version = "0.13.9", optional = true, features = ["cuda-12080", "cuda-version-from-build-system"] }
2424
flate2 = "1.0.34"
2525
gauss-quad = "0.2.1"
2626
impl-new-derive = "0.1.2"
2727
implied-vol = "1.0.0"
2828
indicatif = "0.17.8"
29-
itransformer = "1.0.1"
29+
# itransformer = "1.0.1"
3030
kendalls = "0.2.2"
3131
levenberg-marquardt = "0.14.0"
32+
libloading = { version = "0.8.6", optional = true }
3233
linreg = "0.2.0"
3334
mimalloc = { version = "0.1.43", optional = true }
3435
nalgebra = "0.33.2"
@@ -37,7 +38,7 @@ ndarray = { version = "0.16.1", features = [
3738
"matrixmultiply-threading",
3839
"blas",
3940
] }
40-
ndarray-linalg = { version = "0.17.0", features = ["openblas-static"] }
41+
ndarray-linalg = { version = "0.17.0"}
4142
ndarray-npy = "0.9.1"
4243
ndarray-rand = "0.15.0"
4344
ndarray-stats = "0.6.0"
@@ -68,8 +69,8 @@ yahoo_finance_api = { version = "2.3.0", optional = true }
6869
[dev-dependencies]
6970

7071
[features]
71-
cuda = ["dep:cudarc"]
72-
default = ["jemalloc"]
72+
cuda = ["dep:cudarc", "dep:libloading"]
73+
default = ["cuda"]
7374
jemalloc = ["dep:tikv-jemallocator"]
7475
malliavin = []
7576
mimalloc = ["dep:mimalloc"]
@@ -86,5 +87,17 @@ debug = false
8687
codegen-units = 1
8788
lto = true
8889

90+
[target.'cfg(target_os = "macos")'.dependencies]
91+
ndarray-linalg = { version = "0.17.0", features = ["openblas-static"] }
92+
93+
[target.'cfg(target_os = "macos")'.features]
94+
default = ["jemalloc"]
95+
96+
[target.'cfg(target_os = "linux")'.features]
97+
default = ["jemalloc"]
98+
99+
[target.'cfg(target_os = "windows")'.features]
100+
default = ["mimalloc"]
101+
89102
# [package.metadata.docs.rs]
90103
# all-features = true

src/ai.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use candle_core::Tensor;
2-
pub use itransformer::ITransformer;
2+
// pub use itransformer::ITransformer;
33

44
pub mod fou;
55
pub mod utils;

src/stochastic/cuda/fgn.cu

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#include <stdio.h>
2+
#include <cuda_runtime.h>
3+
#include <curand_kernel.h>
4+
#include <cufft.h>
5+
#include <cuComplex.h>
6+
#include <math.h>
7+
8+
#ifdef _WIN32
9+
#define EXPORT __declspec(dllexport)
10+
#else
11+
#define EXPORT
12+
#endif
13+
14+
__global__ void fill_random_with_eigs(
15+
cuComplex *d_data,
16+
const cuComplex *d_sqrt_eigs,
17+
int traj_size,
18+
int m,
19+
unsigned long seed)
20+
{
21+
int traj_id = blockIdx.x;
22+
if (traj_id >= m)
23+
return;
24+
25+
int idx = threadIdx.x;
26+
if (idx >= traj_size)
27+
return;
28+
29+
int data_idx = traj_id * traj_size + idx;
30+
31+
curandState state;
32+
curand_init(seed + traj_id, idx, 0, &state);
33+
34+
float re = curand_normal(&state);
35+
float im = curand_normal(&state);
36+
cuComplex noise = make_cuComplex(re, im);
37+
38+
d_data[data_idx] = cuCmulf(noise, d_sqrt_eigs[idx]);
39+
}
40+
41+
__global__ void scale_and_copy_to_output(
42+
const cuComplex *d_data,
43+
float *d_output,
44+
int n,
45+
int m,
46+
int offset,
47+
float hurst,
48+
float t)
49+
{
50+
int traj_id = blockIdx.x;
51+
if (traj_id >= m)
52+
return;
53+
54+
int idx = threadIdx.x;
55+
int out_size = n - offset;
56+
if (idx >= out_size)
57+
return;
58+
59+
int data_idx = traj_id * (2 * n) + (idx + 1);
60+
float scale = powf((float)n, -hurst) * powf(t, hurst);
61+
62+
int out_idx = traj_id * out_size + idx;
63+
d_output[out_idx] = d_data[data_idx].x * scale;
64+
}
65+
66+
extern "C" EXPORT void fgn_kernel(
67+
const cuComplex *d_sqrt_eigs,
68+
float *d_output,
69+
int n,
70+
int m,
71+
int offset,
72+
float hurst,
73+
float t,
74+
unsigned long seed)
75+
{
76+
int traj_size = 2 * n;
77+
78+
cuComplex *d_data = nullptr;
79+
cudaMalloc(&d_data, (size_t)m * traj_size * sizeof(cuComplex));
80+
81+
{
82+
dim3 gridDim(m);
83+
dim3 blockDim(traj_size);
84+
fill_random_with_eigs<<<gridDim, blockDim>>>(
85+
d_data, d_sqrt_eigs, traj_size, m, seed);
86+
cudaDeviceSynchronize();
87+
}
88+
89+
{
90+
cufftHandle plan;
91+
cufftPlan1d(&plan, traj_size, CUFFT_C2C, m);
92+
cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD);
93+
cudaDeviceSynchronize();
94+
cufftDestroy(plan);
95+
}
96+
97+
{
98+
dim3 gridDim(m);
99+
dim3 blockDim(n);
100+
scale_and_copy_to_output<<<gridDim, blockDim>>>(
101+
d_data, d_output, n, m, offset, hurst, t);
102+
cudaDeviceSynchronize();
103+
}
104+
105+
cudaFree(d_data);
106+
}
827 Bytes
Binary file not shown.
1.84 KB
Binary file not shown.
837 KB
Binary file not shown.
837 KB
Binary file not shown.
822 Bytes
Binary file not shown.
1.8 KB
Binary file not shown.

src/stochastic/noise/fgn.cu

Lines changed: 0 additions & 54 deletions
This file was deleted.

0 commit comments

Comments
 (0)