Skip to content

Commit 79fb41f

Browse files
committed
chore: reorganize correlation
1 parent baf893a commit 79fb41f

File tree

3 files changed

+164
-188
lines changed

3 files changed

+164
-188
lines changed

src/stats/copula.rs

Lines changed: 0 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -48,68 +48,6 @@ pub fn cdf_gumbel(u: f64, v: f64, theta: f64) -> f64 {
4848
((-1.0) * s.powf(1.0 / theta)).exp()
4949
}
5050

51-
/// Empirical copula (2D) - rank-based transformation
52-
#[derive(Clone, Debug)]
53-
pub struct EmpiricalCopula2D {
54-
/// The rank-transformed data (N x 2), each row in [0,1]^2
55-
pub rank_data: Array2<f64>,
56-
}
57-
58-
impl EmpiricalCopula2D {
59-
/// Create an EmpiricalCopula2D from two 1D arrays (`x` and `y`) of equal length.
60-
/// This performs a rank-based transform: for each sample i,
61-
/// sx[i] = rank_of_x[i] / n
62-
/// sy[i] = rank_of_y[i] / n
63-
/// and stores the resulting points in [0,1]^2.
64-
pub fn new_from_two_series(x: &Array1<f64>, y: &Array1<f64>) -> Self {
65-
assert_eq!(x.len(), y.len(), "x and y must have the same length!");
66-
let n = x.len();
67-
68-
// Convert to Vec for easier sorting with indices
69-
let mut xv: Vec<(f64, usize)> = x.iter().enumerate().map(|(i, &val)| (val, i)).collect();
70-
let mut yv: Vec<(f64, usize)> = y.iter().enumerate().map(|(i, &val)| (val, i)).collect();
71-
72-
// Sort by the actual float value
73-
xv.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
74-
yv.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
75-
76-
// After sorting, xv[k] = (value, original_index).
77-
// The rank of that original index is k.
78-
let mut rank_x = vec![0.0; n];
79-
let mut rank_y = vec![0.0; n];
80-
for (rank, &(_val, orig_i)) in xv.iter().enumerate() {
81-
rank_x[orig_i] = rank as f64; // rank in [0..n-1]
82-
}
83-
for (rank, &(_val, orig_i)) in yv.iter().enumerate() {
84-
rank_y[orig_i] = rank as f64;
85-
}
86-
87-
// Normalize ranks to [0,1].
88-
for i in 0..n {
89-
rank_x[i] /= n as f64;
90-
rank_y[i] /= n as f64;
91-
}
92-
93-
// Build final (n x 2) array
94-
let mut rank_data = Array2::<f64>::zeros((n, 2));
95-
for i in 0..n {
96-
rank_data[[i, 0]] = rank_x[i];
97-
rank_data[[i, 1]] = rank_y[i];
98-
}
99-
EmpiricalCopula2D { rank_data }
100-
}
101-
}
102-
103-
impl NCopula2D for EmpiricalCopula2D {
104-
fn sample(&self, _n: usize) -> Array2<f64> {
105-
self.rank_data.clone()
106-
}
107-
108-
fn get_params(&self) -> Vec<f64> {
109-
vec![]
110-
}
111-
}
112-
11351
/// Gaussian copula (2D)
11452
#[derive(Clone, Debug)]
11553
pub struct GaussianCopula2D {
@@ -245,109 +183,13 @@ impl NCopula2D for ClaytonCopula2D {
245183
}
246184
}
247185

248-
/// Kendall's tau matrix for a given data matrix
249-
pub fn kendall_tau(data: &Array2<f64>) -> Array2<f64> {
250-
let cols = data.ncols();
251-
let mut tau_matrix = Array2::<f64>::zeros((cols, cols));
252-
253-
for i in 0..cols {
254-
for j in i..cols {
255-
let col_i = data.column(i);
256-
let col_j = data.column(j);
257-
let mut concordant = 0;
258-
let mut discordant = 0;
259-
260-
for k in 0..col_i.len() {
261-
for l in (k + 1)..col_i.len() {
262-
let x_diff = col_i[k] - col_i[l];
263-
let y_diff = col_j[k] - col_j[l];
264-
let sign = x_diff * y_diff;
265-
266-
if sign > 0.0 {
267-
concordant += 1;
268-
} else if sign < 0.0 {
269-
discordant += 1;
270-
}
271-
}
272-
}
273-
274-
let total_pairs = (col_i.len() * (col_i.len() - 1)) / 2;
275-
let tau = (concordant as f64 - discordant as f64) / total_pairs as f64;
276-
tau_matrix[[i, j]] = tau;
277-
tau_matrix[[j, i]] = tau;
278-
}
279-
}
280-
281-
tau_matrix
282-
}
283-
284-
fn spearman_correlation(data: &Array2<f64>) -> Array2<f64> {
285-
let cols = data.ncols();
286-
let mut rho_matrix = Array2::<f64>::zeros((cols, cols));
287-
288-
for i in 0..cols {
289-
for j in i..cols {
290-
let col_i = data.column(i);
291-
let col_j = data.column(j);
292-
293-
let mean_i = col_i.sum() / col_i.len() as f64;
294-
let mean_j = col_j.sum() / col_j.len() as f64;
295-
296-
let numerator: f64 = col_i
297-
.iter()
298-
.zip(col_j.iter())
299-
.map(|(&xi, &yi)| (xi - mean_i) * (yi - mean_j))
300-
.sum();
301-
302-
let denominator_i = col_i
303-
.iter()
304-
.map(|&xi| (xi - mean_i).powi(2))
305-
.sum::<f64>()
306-
.sqrt();
307-
let denominator_j = col_j
308-
.iter()
309-
.map(|&yi| (yi - mean_j).powi(2))
310-
.sum::<f64>()
311-
.sqrt();
312-
313-
let rho = numerator / (denominator_i * denominator_j);
314-
rho_matrix[[i, j]] = rho;
315-
rho_matrix[[j, i]] = rho; // Szimmetrikus mátrix
316-
}
317-
}
318-
319-
rho_matrix
320-
}
321-
322186
#[cfg(test)]
323187
mod tests {
324188
use super::*;
325189
use ndarray::arr2;
326-
use rand_distr::Uniform;
327190

328191
const N: usize = 10000;
329192

330-
#[test]
331-
fn test_empirical_copula() {
332-
let mut rng = thread_rng();
333-
let uniform = Uniform::new(0.0, 1.0);
334-
335-
let len_data = 500;
336-
let mut x = Array1::<f64>::zeros(len_data);
337-
let mut y = Array1::<f64>::zeros(len_data);
338-
for i in 0..len_data {
339-
let xv = uniform.sample(&mut rng);
340-
// Introduce some linear correlation
341-
let yv = 0.3 * uniform.sample(&mut rng) + 0.7 * xv;
342-
x[i] = xv;
343-
y[i] = yv.clamp(0.0, 1.0);
344-
}
345-
346-
let empirical = EmpiricalCopula2D::new_from_two_series(&x, &y);
347-
let emp_samples = empirical.sample(N);
348-
plot_copula_samples(&emp_samples, "Empirical Copula (2D) - Rank-based data");
349-
}
350-
351193
#[test]
352194
fn test_gaussian_copula() {
353195
let gauss = GaussianCopula2D {
@@ -379,34 +221,4 @@ mod tests {
379221
let c_clay = cdf_clayton(0.5, 0.8, 2.0);
380222
println!("Clayton(θ=2) CDF(0.5, 0.8) = {}", c_clay);
381223
}
382-
383-
#[test]
384-
fn test_kendall_tau() {
385-
let data = arr2(&[
386-
[1.0, 2.0, 3.0],
387-
[2.0, 3.0, 1.0],
388-
[3.0, 1.0, 2.0],
389-
[4.0, 4.0, 4.0],
390-
]);
391-
let x = data.column(0).to_owned();
392-
let y = data.column(1).to_owned();
393-
let copula = EmpiricalCopula2D::new_from_two_series(&x, &y);
394-
let tau_matrix = kendall_tau(&copula.rank_data);
395-
println!("Kendall's tau matrix:\n{:?}", tau_matrix);
396-
}
397-
398-
#[test]
399-
fn test_spearman_correlation() {
400-
let data = arr2(&[
401-
[1.0, 2.0, 3.0],
402-
[2.0, 3.0, 1.0],
403-
[3.0, 1.0, 2.0],
404-
[4.0, 4.0, 4.0],
405-
]);
406-
let x = data.column(0).to_owned();
407-
let y = data.column(1).to_owned();
408-
let copula = EmpiricalCopula2D::new_from_two_series(&x, &y);
409-
let rho_matrix = spearman_correlation(&copula.rank_data);
410-
println!("Spearman's rho matrix:\n{:?}", rho_matrix);
411-
}
412224
}

src/stats/copulas/correlation.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
use ndarray::Array2;
2+
3+
/// Kendall's tau matrix for a given data matrix
4+
pub fn kendall_tau(data: &Array2<f64>) -> Array2<f64> {
5+
let cols = data.ncols();
6+
let mut tau_matrix = Array2::<f64>::zeros((cols, cols));
7+
8+
for i in 0..cols {
9+
for j in i..cols {
10+
let col_i = data.column(i);
11+
let col_j = data.column(j);
12+
let mut concordant = 0;
13+
let mut discordant = 0;
14+
15+
for k in 0..col_i.len() {
16+
for l in (k + 1)..col_i.len() {
17+
let x_diff = col_i[k] - col_i[l];
18+
let y_diff = col_j[k] - col_j[l];
19+
let sign = x_diff * y_diff;
20+
21+
if sign > 0.0 {
22+
concordant += 1;
23+
} else if sign < 0.0 {
24+
discordant += 1;
25+
}
26+
}
27+
}
28+
29+
let total_pairs = (col_i.len() * (col_i.len() - 1)) / 2;
30+
let tau = (concordant as f64 - discordant as f64) / total_pairs as f64;
31+
tau_matrix[[i, j]] = tau;
32+
tau_matrix[[j, i]] = tau;
33+
}
34+
}
35+
36+
tau_matrix
37+
}
38+
39+
fn spearman_correlation(data: &Array2<f64>) -> Array2<f64> {
40+
let cols = data.ncols();
41+
let mut rho_matrix = Array2::<f64>::zeros((cols, cols));
42+
43+
for i in 0..cols {
44+
for j in i..cols {
45+
let col_i = data.column(i);
46+
let col_j = data.column(j);
47+
48+
let mean_i = col_i.sum() / col_i.len() as f64;
49+
let mean_j = col_j.sum() / col_j.len() as f64;
50+
51+
let numerator: f64 = col_i
52+
.iter()
53+
.zip(col_j.iter())
54+
.map(|(&xi, &yi)| (xi - mean_i) * (yi - mean_j))
55+
.sum();
56+
57+
let denominator_i = col_i
58+
.iter()
59+
.map(|&xi| (xi - mean_i).powi(2))
60+
.sum::<f64>()
61+
.sqrt();
62+
let denominator_j = col_j
63+
.iter()
64+
.map(|&yi| (yi - mean_j).powi(2))
65+
.sum::<f64>()
66+
.sqrt();
67+
68+
let rho = numerator / (denominator_i * denominator_j);
69+
rho_matrix[[i, j]] = rho;
70+
rho_matrix[[j, i]] = rho; // Szimmetrikus mátrix
71+
}
72+
}
73+
74+
rho_matrix
75+
}

src/stats/copulas/empirical.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
use ndarray::{Array1, Array2};
2+
3+
/// Empirical copula (2D) - rank-based transformation
4+
#[derive(Clone, Debug)]
5+
pub struct EmpiricalCopula2D {
6+
/// The rank-transformed data (N x 2), each row in [0,1]^2
7+
pub rank_data: Array2<f64>,
8+
}
9+
10+
impl EmpiricalCopula2D {
11+
/// Create an EmpiricalCopula2D from two 1D arrays (`x` and `y`) of equal length.
12+
/// This performs a rank-based transform: for each sample i,
13+
/// sx[i] = rank_of_x[i] / n
14+
/// sy[i] = rank_of_y[i] / n
15+
/// and stores the resulting points in [0,1]^2.
16+
pub fn new_from_two_series(x: &Array1<f64>, y: &Array1<f64>) -> Self {
17+
assert_eq!(x.len(), y.len(), "x and y must have the same length!");
18+
let n = x.len();
19+
20+
// Convert to Vec for easier sorting with indices
21+
let mut xv: Vec<(f64, usize)> = x.iter().enumerate().map(|(i, &val)| (val, i)).collect();
22+
let mut yv: Vec<(f64, usize)> = y.iter().enumerate().map(|(i, &val)| (val, i)).collect();
23+
24+
// Sort by the actual float value
25+
xv.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
26+
yv.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
27+
28+
// After sorting, xv[k] = (value, original_index).
29+
// The rank of that original index is k.
30+
let mut rank_x = vec![0.0; n];
31+
let mut rank_y = vec![0.0; n];
32+
for (rank, &(_val, orig_i)) in xv.iter().enumerate() {
33+
rank_x[orig_i] = rank as f64; // rank in [0..n-1]
34+
}
35+
for (rank, &(_val, orig_i)) in yv.iter().enumerate() {
36+
rank_y[orig_i] = rank as f64;
37+
}
38+
39+
// Normalize ranks to [0,1].
40+
for i in 0..n {
41+
rank_x[i] /= n as f64;
42+
rank_y[i] /= n as f64;
43+
}
44+
45+
// Build final (n x 2) array
46+
let mut rank_data = Array2::<f64>::zeros((n, 2));
47+
for i in 0..n {
48+
rank_data[[i, 0]] = rank_x[i];
49+
rank_data[[i, 1]] = rank_y[i];
50+
}
51+
EmpiricalCopula2D { rank_data }
52+
}
53+
54+
fn sample(&self, _n: usize) -> Array2<f64> {
55+
self.rank_data.clone()
56+
}
57+
}
58+
59+
#[cfg(test)]
60+
mod tests {
61+
use ndarray::Array1;
62+
use rand::thread_rng;
63+
use rand_distr::{Distribution, Uniform};
64+
65+
use crate::{stats::copula::plot_copula_samples, stochastic::N};
66+
67+
use super::EmpiricalCopula2D;
68+
69+
#[test]
70+
fn test_empirical_copula() {
71+
let mut rng = thread_rng();
72+
let uniform = Uniform::new(0.0, 1.0);
73+
74+
let len_data = 500;
75+
let mut x = Array1::<f64>::zeros(len_data);
76+
let mut y = Array1::<f64>::zeros(len_data);
77+
for i in 0..len_data {
78+
let xv = uniform.sample(&mut rng);
79+
// Introduce some linear correlation
80+
let yv = 0.3 * uniform.sample(&mut rng) + 0.7 * xv;
81+
x[i] = xv;
82+
y[i] = yv.clamp(0.0, 1.0);
83+
}
84+
85+
let empirical = EmpiricalCopula2D::new_from_two_series(&x, &y);
86+
let emp_samples = empirical.sample(N);
87+
plot_copula_samples(&emp_samples, "Empirical Copula (2D) - Rank-based data");
88+
}
89+
}

0 commit comments

Comments
 (0)