diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..ddff440 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C", "target-cpu=native"] diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1cd0722..b245e70 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -17,12 +17,12 @@ jobs: steps: - uses: actions/checkout@v4 # rust-version from Cargo.toml - - name: Install Rust 1.65.0 - uses: dtolnay/rust-toolchain@1.65.0 + - name: Install Rust 1.82.0 + uses: dtolnay/rust-toolchain@1.82.0 - name: Use predefined lockfile run: mv Cargo.lock.MSRV Cargo.lock - name: Build - run: cargo check --locked + run: cargo check build: runs-on: ubuntu-latest diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d9cf66e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "rust-analyzer.cargo.features": [ + "libblur", + "rayon" + ] +} \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 37f1a26..bf0aab3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ssimulacra2" -version = "0.5.1" +version = "0.6.0" edition = "2021" description = "Rust implementation of the SSIMULACRA2 metric" repository = "https://github.com/rust-av/ssimulacra2" @@ -10,16 +10,17 @@ exclude = ["test_data"] license = "BSD-2-Clause" # When changing MSRV: Also update README and .github/workflows/rust.yml -rust-version = "1.65.0" +rust-version = "1.82.0" [features] default = ["rayon"] [dependencies] -num-traits = "0.2.15" -rayon = { version = "1.5.3", optional = true } -thiserror = "2.0.9" -yuvxyb = "0.4.1" +libblur = { version = "0.17", optional = true } +num-traits = "0.2.19" +rayon = { version = "1.10", optional = true } +thiserror = "2.0.12" +yuvxyb = "0.4.2" [build-dependencies.yuvxyb-math] version = "0.1" @@ -27,7 +28,7 @@ version = "0.1" [dev-dependencies] criterion = "0.5.0" image = "0.25.0" -rand = "0.8.5" +rand = "0.9.1" [[bench]] name = "benches" diff --git a/benches/benches.rs b/benches/benches.rs index a9d4316..44d4b85 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -2,9 +2,10 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use num_traits::clamp; use rand::Rng; use ssimulacra2::{ - compute_frame_ssimulacra2, Blur, ColorPrimaries, Frame, MatrixCoefficients, Plane, - TransferCharacteristic, Yuv, YuvConfig, + compute_frame_ssimulacra2, xyb_to_planar, Blur, BlurOperator, ColorPrimaries, Frame, + MatrixCoefficients, Plane, TransferCharacteristic, Yuv, YuvConfig, }; +use yuvxyb::{LinearRgb, Xyb}; fn make_yuv( ss: (u8, u8), @@ -36,10 +37,10 @@ fn make_yuv( ), ], }; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); for (i, plane) in data.planes.iter_mut().enumerate() { for val in plane.data_origin_mut().iter_mut() { - *val = rng.gen_range(if full_range { + *val = rng.random_range(if full_range { 0..=255 } else if i == 0 { 16..=235 @@ -64,7 +65,7 @@ fn make_yuv( } fn distort_yuv(input: &Yuv) -> Yuv { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut planes = [ input.data()[0].clone(), input.data()[1].clone(), @@ -72,7 +73,7 @@ fn distort_yuv(input: &Yuv) -> Yuv { ]; for plane in &mut planes { for pix in plane.data_origin_mut() { - *pix = clamp(i16::from(*pix) + rng.gen_range(-16..=16), 0, 255) as u8; + *pix = clamp(i16::from(*pix) + rng.random_range(-16..=16), 0, 255) as u8; } } let data: Frame = Frame { planes }; @@ -121,10 +122,169 @@ fn bench_blur(c: &mut Criterion) { // Blur the image let mut blur = Blur::new(width, height); + let mut out = std::array::from_fn(|_| vec![0.0f32; width * height]); - b.iter(|| blur.blur(black_box(&image))) + b.iter(|| blur.blur(black_box(&image), black_box(&mut out))); }); } -criterion_group!(benches, bench_ssimulacra2, bench_blur); +fn create_linearrgb_from_image(path: &str) -> LinearRgb { + let img = image::open(path).unwrap(); + let img = img.to_rgb8(); + let (width, height) = img.dimensions(); + + let mut data = vec![[0.0f32; 3]; (width * height) as usize]; + for (i, pixel) in img.pixels().enumerate() { + data[i] = [ + pixel[0] as f32 / 255.0, + pixel[1] as f32 / 255.0, + pixel[2] as f32 / 255.0, + ]; + } + + LinearRgb::new(data, width as usize, height as usize).expect("이미지 변환 실패") +} + +// downscale_by_2 +fn bench_downscale_by_2(c: &mut Criterion) { + // let mut group = c.benchmark_group("downscale"); + // group.measurement_time(std::time::Duration::from_secs_f32(9.2)); //Unable to complete 100 samples in 9.0s, so increased to 9.2s + + c.bench_function("downscale_by_2", |b| { + // load the image + let image = create_linearrgb_from_image("test_data/tank_source.png"); + + b.iter(|| ssimulacra2::downscale_by_2(black_box(&image))) + }); + // c.finish(); +} + +fn bench_image_multiply(c: &mut Criterion) { + c.bench_function("image_multiply", |b| { + let image = create_linearrgb_from_image("test_data/tank_source.png"); + let width = image.width(); + let height = image.height(); + + let img1 = xyb_to_planar(&Xyb::from(image.clone())); + let img2 = img1.clone(); + + let mut out = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + + b.iter(|| { + ssimulacra2::image_multiply(black_box(&img1), black_box(&img2), black_box(&mut out)) + }) + }); +} + +fn bench_ssim_map(c: &mut Criterion) { + let mut group = c.benchmark_group("ssim_map"); + group.measurement_time(std::time::Duration::from_secs(9)); + + group.bench_function("standard", |b| { + let source_image = create_linearrgb_from_image("test_data/tank_source.png"); + let distorted_image = create_linearrgb_from_image("test_data/tank_distorted.png"); + let width = source_image.width(); + let height = source_image.height(); + + let mut img1_xyb = Xyb::from(source_image); + let mut img2_xyb = Xyb::from(distorted_image); + + for pix in img1_xyb.data_mut().iter_mut() { + pix[2] = (pix[2] - pix[1]) + 0.55; + pix[0] = (pix[0]).mul_add(14.0, 0.42); + pix[1] += 0.01; + } + for pix in img2_xyb.data_mut().iter_mut() { + pix[2] = (pix[2] - pix[1]) + 0.55; + pix[0] = (pix[0]).mul_add(14.0, 0.42); + pix[1] += 0.01; + } + + let img1_planar = xyb_to_planar(&img1_xyb); + let img2_planar = xyb_to_planar(&img2_xyb); + + let mut blur = Blur::new(width, height); + + let mut mul = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + let mut sigma1_sq = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + let mut sigma2_sq = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + let mut sigma12 = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + let mut mu1 = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + let mut mu2 = [ + vec![0.0f32; width * height], + vec![0.0f32; width * height], + vec![0.0f32; width * height], + ]; + + ssimulacra2::image_multiply(&img1_planar, &img1_planar, &mut mul); + blur.blur(&mul, &mut sigma1_sq).expect("blur failed"); + + ssimulacra2::image_multiply(&img2_planar, &img2_planar, &mut mul); + blur.blur(&mul, &mut sigma2_sq).expect("blur failed"); + + ssimulacra2::image_multiply(&img1_planar, &img2_planar, &mut mul); + blur.blur(&mul, &mut sigma12).expect("blur failed"); + + blur.blur(&img1_planar, &mut mu1).expect("blur failed"); + blur.blur(&img2_planar, &mut mu2).expect("blur failed"); + + b.iter(|| { + ssimulacra2::ssim_map( + black_box(width), + black_box(height), + black_box(&mu1), + black_box(&mu2), + black_box(&sigma1_sq), + black_box(&sigma2_sq), + black_box(&sigma12), + ) + }); + }); + + group.finish(); +} + +fn bench_xyb_to_planar(c: &mut Criterion) { + c.bench_function("xyb_to_planar", |b| { + let image = create_linearrgb_from_image("test_data/tank_source.png"); + let xyb = Xyb::from(image.clone()); + + b.iter(|| xyb_to_planar(black_box(&xyb))) + }); +} + +// criterion_group!(benches, bench_ssimulacra2, bench_blur); +criterion_group!( + benches, + bench_ssimulacra2, + bench_blur, + bench_downscale_by_2, + bench_image_multiply, + bench_ssim_map, + bench_xyb_to_planar +); criterion_main!(benches); diff --git a/build.rs b/build.rs index 3fd6e2b..b96b20f 100644 --- a/build.rs +++ b/build.rs @@ -121,13 +121,13 @@ fn init_recursive_gaussian(out_path: &str) -> io::Result<()> { write_const_usize(&mut out_file, "RADIUS", radius as usize)?; - write_const_f32(&mut out_file, "VERT_MUL_IN_1", n2[0] as f32)?; - write_const_f32(&mut out_file, "VERT_MUL_IN_3", n2[1] as f32)?; - write_const_f32(&mut out_file, "VERT_MUL_IN_5", n2[2] as f32)?; + // write_const_f32(&mut out_file, "VERT_MUL_IN_1", n2[0] as f32)?; + // write_const_f32(&mut out_file, "VERT_MUL_IN_3", n2[1] as f32)?; + // write_const_f32(&mut out_file, "VERT_MUL_IN_5", n2[2] as f32)?; - write_const_f32(&mut out_file, "VERT_MUL_PREV_1", d1[0] as f32)?; - write_const_f32(&mut out_file, "VERT_MUL_PREV_3", d1[1] as f32)?; - write_const_f32(&mut out_file, "VERT_MUL_PREV_5", d1[2] as f32)?; + // write_const_f32(&mut out_file, "VERT_MUL_PREV_1", d1[0] as f32)?; + // write_const_f32(&mut out_file, "VERT_MUL_PREV_3", d1[1] as f32)?; + // write_const_f32(&mut out_file, "VERT_MUL_PREV_5", d1[2] as f32)?; write_const_f32(&mut out_file, "MUL_IN_1", mul_in[0])?; write_const_f32(&mut out_file, "MUL_IN_3", mul_in[4])?; diff --git a/src/blur/gaussian.rs b/src/blur/gaussian.rs deleted file mode 100644 index bd99067..0000000 --- a/src/blur/gaussian.rs +++ /dev/null @@ -1,186 +0,0 @@ -mod consts { - #![allow(clippy::unreadable_literal)] - include!(concat!(env!("OUT_DIR"), "/recursive_gaussian.rs")); -} - -/// Implements "Recursive Implementation of the Gaussian Filter Using Truncated -/// Cosine Functions" by Charalampidis [2016]. -pub struct RecursiveGaussian; - -impl RecursiveGaussian { - #[cfg(feature = "rayon")] - pub fn horizontal_pass(&self, input: &[f32], output: &mut [f32], width: usize) { - use rayon::iter::{IndexedParallelIterator, ParallelIterator}; - use rayon::prelude::ParallelSliceMut; - use rayon::slice::ParallelSlice; - - assert_eq!(input.len(), output.len()); - - input - .par_chunks_exact(width) - .zip(output.par_chunks_exact_mut(width)) - .for_each(|(input, output)| self.horizontal_row(input, output, width)); - } - - #[cfg(not(feature = "rayon"))] - pub fn horizontal_pass(&self, input: &[f32], output: &mut [f32], width: usize) { - assert_eq!(input.len(), output.len()); - - for (input, output) in input - .chunks_exact(width) - .zip(output.chunks_exact_mut(width)) - { - self.horizontal_row(input, output, width); - } - } - - fn horizontal_row(&self, input: &[f32], output: &mut [f32], width: usize) { - let big_n = consts::RADIUS as isize; - let mut prev_1 = 0f32; - let mut prev_3 = 0f32; - let mut prev_5 = 0f32; - let mut prev2_1 = 0f32; - let mut prev2_3 = 0f32; - let mut prev2_5 = 0f32; - - let mut n = (-big_n) + 1; - while n < width as isize { - let left = n - big_n - 1; - let right = n + big_n - 1; - let left_val = if left >= 0 { - // SAFETY: `left` can never be bigger than `width` - unsafe { *input.get_unchecked(left as usize) } - } else { - 0f32 - }; - let right_val = if right < width as isize { - // SAFETY: this branch ensures that `right` is not bigger than `width` - unsafe { *input.get_unchecked(right as usize) } - } else { - 0f32 - }; - let sum = left_val + right_val; - - let mut out_1 = sum * consts::MUL_IN_1; - let mut out_3 = sum * consts::MUL_IN_3; - let mut out_5 = sum * consts::MUL_IN_5; - - out_1 = consts::MUL_PREV2_1.mul_add(prev2_1, out_1); - out_3 = consts::MUL_PREV2_3.mul_add(prev2_3, out_3); - out_5 = consts::MUL_PREV2_5.mul_add(prev2_5, out_5); - prev2_1 = prev_1; - prev2_3 = prev_3; - prev2_5 = prev_5; - - out_1 = consts::MUL_PREV_1.mul_add(prev_1, out_1); - out_3 = consts::MUL_PREV_3.mul_add(prev_3, out_3); - out_5 = consts::MUL_PREV_5.mul_add(prev_5, out_5); - prev_1 = out_1; - prev_3 = out_3; - prev_5 = out_5; - - if n >= 0 { - // SAFETY: We know that this chunk of output is of size `width`, - // which `n` cannot be larger than. - unsafe { - *output.get_unchecked_mut(n as usize) = out_1 + out_3 + out_5; - } - } - - n += 1; - } - } - - pub fn vertical_pass_chunked( - &self, - input: &[f32], - output: &mut [f32], - width: usize, - height: usize, - ) { - assert!(J > K); - assert!(K > 0); - - assert_eq!(input.len(), output.len()); - - let mut x = 0; - while x + J <= width { - self.vertical_pass::(&input[x..], &mut output[x..], width, height); - x += J; - } - - while x + K <= width { - self.vertical_pass::(&input[x..], &mut output[x..], width, height); - x += K; - } - - while x < width { - self.vertical_pass::<1>(&input[x..], &mut output[x..], width, height); - x += 1; - } - } - - // Apply 1D vertical scan on COLUMNS elements at a time - pub fn vertical_pass( - &self, - input: &[f32], - output: &mut [f32], - width: usize, - height: usize, - ) { - assert_eq!(input.len(), output.len()); - - let big_n = consts::RADIUS as isize; - - let zeroes = vec![0f32; COLUMNS]; - let mut prev = vec![0f32; 3 * COLUMNS]; - let mut prev2 = vec![0f32; 3 * COLUMNS]; - let mut out = vec![0f32; 3 * COLUMNS]; - - let mut n = (-big_n) + 1; - while n < height as isize { - let top = n - big_n - 1; - let bottom = n + big_n - 1; - let top_row = if top >= 0 { - &input[top as usize * width..][..COLUMNS] - } else { - &zeroes - }; - - let bottom_row = if bottom < height as isize { - &input[bottom as usize * width..][..COLUMNS] - } else { - &zeroes - }; - - for i in 0..COLUMNS { - let sum = top_row[i] + bottom_row[i]; - - let i1 = i; - let i3 = i1 + COLUMNS; - let i5 = i3 + COLUMNS; - - let out1 = prev[i1].mul_add(consts::VERT_MUL_PREV_1, prev2[i1]); - let out3 = prev[i3].mul_add(consts::VERT_MUL_PREV_3, prev2[i3]); - let out5 = prev[i5].mul_add(consts::VERT_MUL_PREV_5, prev2[i5]); - - let out1 = sum.mul_add(consts::VERT_MUL_IN_1, -out1); - let out3 = sum.mul_add(consts::VERT_MUL_IN_3, -out3); - let out5 = sum.mul_add(consts::VERT_MUL_IN_5, -out5); - - out[i1] = out1; - out[i3] = out3; - out[i5] = out5; - - if n >= 0 { - output[n as usize * width + i] = out1 + out3 + out5; - } - } - - prev2.copy_from_slice(&prev); - prev.copy_from_slice(&out); - - n += 1; - } - } -} diff --git a/src/blur/gaussian_impl/gaussian.rs b/src/blur/gaussian_impl/gaussian.rs new file mode 100644 index 0000000..812c03a --- /dev/null +++ b/src/blur/gaussian_impl/gaussian.rs @@ -0,0 +1,150 @@ +mod consts { + #![allow(clippy::unreadable_literal)] + include!(concat!(env!("OUT_DIR"), "/recursive_gaussian.rs")); +} + +pub struct RecursiveGaussian; + +impl RecursiveGaussian { + #[inline(always)] + pub fn horizontal_pass(&self, input: &[f32], output: &mut [f32], width: usize) { + assert_eq!(input.len(), output.len()); + + #[cfg(feature = "rayon")] + { + use rayon::prelude::*; + input + .par_chunks_exact(width) + .zip(output.par_chunks_exact_mut(width)) + .for_each(|(input, output)| self.horizontal_row(input, output, width)); + } + + #[cfg(not(feature = "rayon"))] + { + input + .chunks_exact(width) + .zip(output.chunks_exact_mut(width)) + .for_each(|(input, output)| self.horizontal_row(input, output, width)); + } + } + + #[inline(always)] + fn horizontal_row(&self, input: &[f32], output: &mut [f32], width: usize) { + let big_n = consts::RADIUS as isize; + let [mul_in_1, mul_in_3, mul_in_5] = [consts::MUL_IN_1, consts::MUL_IN_3, consts::MUL_IN_5]; + let [mul_prev_1, mul_prev_3, mul_prev_5] = + [consts::MUL_PREV_1, consts::MUL_PREV_3, consts::MUL_PREV_5]; + let [mul_prev2_1, mul_prev2_3, mul_prev2_5] = [ + consts::MUL_PREV2_1, + consts::MUL_PREV2_3, + consts::MUL_PREV2_5, + ]; + + let mut prev = [0f32; 6]; // [prev_1, prev_3, prev_5, prev2_1, prev2_3, prev2_5] + + let mut n = (-big_n) + 1; + while n < width as isize { + let left = n - big_n - 1; + let right = n + big_n - 1; + + let left_val = if left >= 0 { + unsafe { *input.get_unchecked(left as usize) } + } else { + 0f32 + }; + let right_val = if right < width as isize { + unsafe { *input.get_unchecked(right as usize) } + } else { + 0f32 + }; + let sum = left_val + right_val; + + let mut out = [sum * mul_in_1, sum * mul_in_3, sum * mul_in_5]; + + out[0] = mul_prev2_1.mul_add(prev[3], out[0]); + out[1] = mul_prev2_3.mul_add(prev[4], out[1]); + out[2] = mul_prev2_5.mul_add(prev[5], out[2]); + + prev[3] = prev[0]; + prev[4] = prev[1]; + prev[5] = prev[2]; + + out[0] = mul_prev_1.mul_add(prev[0], out[0]); + out[1] = mul_prev_3.mul_add(prev[1], out[1]); + out[2] = mul_prev_5.mul_add(prev[2], out[2]); + + prev[0] = out[0]; + prev[1] = out[1]; + prev[2] = out[2]; + + if n >= 0 { + unsafe { + *output.get_unchecked_mut(n as usize) = out[0] + out[1] + out[2]; + } + } + + n += 1; + } + } + + #[inline(always)] + fn transpose(&self, input: &[f32], output: &mut [f32], width: usize, height: usize) { + assert_eq!(input.len(), width * height); + assert_eq!(output.len(), width * height); + + for y in 0..height { + for x in 0..width { + output[x * height + y] = input[y * width + x]; + } + } + } + + #[inline(always)] + pub fn vertical_pass( + &self, + input: &[f32], + output: &mut [f32], + width: usize, + height: usize, + transposed_input: &mut Vec, + transposed_output: &mut Vec, + ) { + let size = width * height; + assert_eq!(input.len(), size); + assert_eq!(output.len(), size); + + // Transpose the input data to make it easier to process in horizontal rows + self.transpose(input, transposed_input, width, height); + + #[cfg(feature = "rayon")] + { + use rayon::prelude::*; + + //Use par_chunks_exact_mut to divide the output buffer and calculate the corresponding input chunk for each chunk + let chunk_size = height; + transposed_output + .par_chunks_exact_mut(chunk_size) + .enumerate() + .for_each(|(index, output_chunk)| { + let start = index * chunk_size; + let input_chunk = &transposed_input[start..start + chunk_size]; + self.horizontal_row(input_chunk, output_chunk, height); + }); + } + + #[cfg(not(feature = "rayon"))] + { + for y in 0..width { + let start = y * height; + self.horizontal_row( + &transposed_input[start..start + height], + &mut transposed_output[start..start + height], + height, + ); + } + } + + // Transpose the output data back to the original format + self.transpose(&transposed_output, output, height, width); + } +} diff --git a/src/blur/gaussian_impl/mod.rs b/src/blur/gaussian_impl/mod.rs new file mode 100644 index 0000000..df148c6 --- /dev/null +++ b/src/blur/gaussian_impl/mod.rs @@ -0,0 +1,65 @@ +mod gaussian; +use super::{BlurOperator, Ssimulacra2Error}; + +pub struct GaussianBlur { + width: usize, + height: usize, + temp_buffer: Option>, + transposed_input: Option>, + transposed_output: Option>, +} + +impl BlurOperator for GaussianBlur { + fn new(width: usize, height: usize) -> Self { + let size = width * height; + GaussianBlur { + width, + height, + temp_buffer: Some(vec![0.0f32; size]), + transposed_input: Some(vec![0.0f32; size]), + transposed_output: Some(vec![0.0f32; size]), + } + } + + fn shrink_to(&mut self, width: usize, height: usize) { + self.width = width; + self.height = height; + + if let Some(buffer) = &mut self.temp_buffer { + buffer.resize(width * height, 0.0); + } + + if let Some(buffer) = &mut self.transposed_input { + buffer.resize(width * height, 0.0); + } + + if let Some(buffer) = &mut self.transposed_output { + buffer.resize(width * height, 0.0); + } + } + + fn blur( + &mut self, + img: &[Vec; 3], + out: &mut [Vec; 3], + ) -> Result<(), Ssimulacra2Error> { + let mut temp = self.temp_buffer.take().unwrap(); + let kernel = gaussian::RecursiveGaussian; + + for i in 0..3 { + kernel.horizontal_pass(&img[i], &mut temp, self.width); + kernel.vertical_pass( + &temp, + &mut out[i], + self.width, + self.height, + self.transposed_input.as_mut().unwrap(), + self.transposed_output.as_mut().unwrap(), + ); + } + + self.temp_buffer = Some(temp); + + Ok(()) + } +} diff --git a/src/blur/libblur_impl/mod.rs b/src/blur/libblur_impl/mod.rs new file mode 100644 index 0000000..5fa2169 --- /dev/null +++ b/src/blur/libblur_impl/mod.rs @@ -0,0 +1,84 @@ +use super::{BlurOperator, Ssimulacra2Error}; +use libblur::{BlurImage, BlurImageMut, EdgeMode, FastBlurChannels, ThreadingPolicy}; +use std::borrow::Cow; + +pub struct LibBlur { + width: usize, + height: usize, +} + +impl BlurOperator for LibBlur { + fn new(width: usize, height: usize) -> Self { + LibBlur { width, height } + } + + fn shrink_to(&mut self, width: usize, height: usize) { + self.width = width; + self.height = height; + } + + fn blur( + &mut self, + img: &[Vec; 3], + out: &mut [Vec; 3], + ) -> Result<(), Ssimulacra2Error> { + self.blur_plane(&img[0], &mut out[0])?; + self.blur_plane(&img[1], &mut out[1])?; + self.blur_plane(&img[2], &mut out[2])?; + Ok(()) + } +} + +impl LibBlur { + fn blur_plane(&mut self, plane: &[f32], out: &mut [f32]) -> Result<(), Ssimulacra2Error> { + //Set kernel size and sigma value - adjust as needed but not recommended to change + const KERNEL_SIZE: u32 = 11; + const SIGMA: f32 = 2.2943; // diff 0.000 / 0.932 + + // BlurImage creation + let src_image = BlurImage { + data: Cow::Borrowed(plane), + width: self.width as u32, + height: self.height as u32, + stride: self.width as u32, // stride == width + channels: FastBlurChannels::Plane, + }; + + // BlurImageMut creation + let mut dst_image = BlurImageMut::borrow( + out, + self.width as u32, + self.height as u32, + FastBlurChannels::Plane, + ); + + // gaussian_blur_f32 call + #[cfg(feature = "rayon")] + if let Err(e) = libblur::gaussian_blur_f32( + &src_image, + &mut dst_image, + KERNEL_SIZE, + SIGMA, + EdgeMode::Clamp, + ThreadingPolicy::Adaptive, // use rayon + ) { + eprintln!("Error in gaussian_blur_f32: {:?}", e); + return Err(Ssimulacra2Error::GaussianBlurError); + } + + #[cfg(not(feature = "rayon"))] + if let Err(e) = libblur::gaussian_blur_f32( + &src_image, + &mut dst_image, + KERNEL_SIZE, + SIGMA, + EdgeMode::Clamp, + ThreadingPolicy::Single, // no rayon + ) { + eprintln!("Error in gaussian_blur_f32: {:?}", e); + return Err(Ssimulacra2Error::GaussianBlurError); + } + + Ok(()) + } +} diff --git a/src/blur/mod.rs b/src/blur/mod.rs index cadca76..f68b0e4 100644 --- a/src/blur/mod.rs +++ b/src/blur/mod.rs @@ -1,60 +1,32 @@ -mod gaussian; +#[cfg(not(feature = "libblur"))] +mod gaussian_impl; +#[cfg(feature = "libblur")] +mod libblur_impl; +use crate::Ssimulacra2Error; -use gaussian::RecursiveGaussian; - -/// Structure handling image blur. +/// Trait handling image blur. /// -/// This struct contains the necessary buffers and the kernel used for blurring +/// This trait contains the necessary buffers and the kernel used for blurring /// (currently a recursive approximation of the Gaussian filter). /// /// Note that the width and height of the image passed to [blur][Self::blur] needs to exactly /// match the width and height of this instance. If you reduce the image size (e.g. via /// downscaling), [`shrink_to`][Self::shrink_to] can be used to resize the internal buffers. -pub struct Blur { - kernel: RecursiveGaussian, - temp: Vec, - width: usize, - height: usize, -} - -impl Blur { +pub trait BlurOperator { /// Create a new [Blur] for images of the given width and height. - /// This pre-allocates the necessary buffers. - #[must_use] - pub fn new(width: usize, height: usize) -> Self { - Blur { - kernel: RecursiveGaussian, - temp: vec![0.0f32; width * height], - width, - height, - } - } - + fn new(width: usize, height: usize) -> Self; /// Truncates the internal buffers to fit images of the given width and height. - /// - /// This will [truncate][Vec::truncate] the internal buffers - /// without affecting the allocated memory. - pub fn shrink_to(&mut self, width: usize, height: usize) { - self.temp.truncate(width * height); - self.width = width; - self.height = height; - } + fn shrink_to(&mut self, width: usize, height: usize); + /// Blur the given image using libblur's gaussian_blur_f32. + fn blur( + &mut self, + img: &[Vec; 3], + out: &mut [Vec; 3], + ) -> Result<(), Ssimulacra2Error>; +} - /// Blur the given image. - pub fn blur(&mut self, img: &[Vec; 3]) -> [Vec; 3] { - [ - self.blur_plane(&img[0]), - self.blur_plane(&img[1]), - self.blur_plane(&img[2]), - ] - } +#[cfg(not(feature = "libblur"))] +pub use gaussian_impl::GaussianBlur as Blur; - fn blur_plane(&mut self, plane: &[f32]) -> Vec { - let mut out = vec![0f32; self.width * self.height]; - self.kernel - .horizontal_pass(plane, &mut self.temp, self.width); - self.kernel - .vertical_pass_chunked::<128, 32>(&self.temp, &mut out, self.width, self.height); - out - } -} +#[cfg(feature = "libblur")] +pub use libblur_impl::LibBlur as Blur; diff --git a/src/lib.rs b/src/lib.rs index 62b4026..a0f0df9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ mod blur; -pub use blur::Blur; +pub use blur::{Blur, BlurOperator}; pub use yuvxyb::{CastFromPrimitive, Frame, LinearRgb, Pixel, Plane, Rgb, Xyb, Yuv}; pub use yuvxyb::{ColorPrimaries, MatrixCoefficients, TransferCharacteristic, YuvConfig}; @@ -25,6 +25,12 @@ pub enum Ssimulacra2Error { /// This is not currently supported by the SSIMULACRA2 metric. #[error("Images must be at least 8x8 pixels")] InvalidImageSize, + + #[error("Error in gaussian_blur_f32")] + GaussianBlurError, + + #[error("Error in fast_image_resize")] + ResizeError, } /// Computes the SSIMULACRA2 score for a given input frame and the distorted @@ -34,6 +40,7 @@ pub enum Ssimulacra2Error { /// - If the source and distorted image width and height do not match /// - If the source or distorted image cannot be converted to XYB successfully /// - If the image is smaller than 8x8 pixels +#[inline(always)] pub fn compute_frame_ssimulacra2(source: T, distorted: U) -> Result where LinearRgb: TryFrom + TryFrom, @@ -57,11 +64,39 @@ where let mut width = img1.width(); let mut height = img1.height(); + let initial_size = width * height; let mut mul = [ - vec![0.0f32; width * height], - vec![0.0f32; width * height], - vec![0.0f32; width * height], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + ]; + + let mut sigma1_sq = [ + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + ]; + let mut sigma2_sq = [ + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + ]; + let mut sigma12 = [ + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + ]; + let mut mu1 = [ + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], ]; + let mut mu2 = [ + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + vec![0.0f32; initial_size], + ]; + let mut blur = Blur::new(width, height); let mut msssim = Msssim::default(); @@ -75,37 +110,55 @@ where img2 = downscale_by_2(&img2); width = img1.width(); height = img2.height(); + + for c in &mut mul { + c.truncate(width * height); + } + for c in &mut sigma1_sq { + c.truncate(width * height); + } + for c in &mut sigma2_sq { + c.truncate(width * height); + } + for c in &mut sigma12 { + c.truncate(width * height); + } + for c in &mut mu1 { + c.truncate(width * height); + } + for c in &mut mu2 { + c.truncate(width * height); + } + + blur.shrink_to(width, height); } - for c in &mut mul { - c.truncate(width * height); - } - blur.shrink_to(width, height); - let mut img1 = Xyb::from(img1.clone()); - let mut img2 = Xyb::from(img2.clone()); + // "Convert to XYB format" + let mut img1_xyb = Xyb::from(img1.clone()); + let mut img2_xyb = Xyb::from(img2.clone()); - make_positive_xyb(&mut img1); - make_positive_xyb(&mut img2); + make_positive_xyb(&mut img1_xyb); + make_positive_xyb(&mut img2_xyb); - // SSIMULACRA2 works with the data in a planar format, - // so we need to convert to that. - let img1 = xyb_to_planar(&img1); - let img2 = xyb_to_planar(&img2); + // "Convert to planar format" + let img1_planar = xyb_to_planar(&img1_xyb); + let img2_planar = xyb_to_planar(&img2_xyb); - image_multiply(&img1, &img1, &mut mul); - let sigma1_sq = blur.blur(&mul); + image_multiply(&img1_planar, &img1_planar, &mut mul); + blur.blur(&mul, &mut sigma1_sq)?; - image_multiply(&img2, &img2, &mut mul); - let sigma2_sq = blur.blur(&mul); + image_multiply(&img2_planar, &img2_planar, &mut mul); + blur.blur(&mul, &mut sigma2_sq)?; - image_multiply(&img1, &img2, &mut mul); - let sigma12 = blur.blur(&mul); + image_multiply(&img1_planar, &img2_planar, &mut mul); + blur.blur(&mul, &mut sigma12)?; - let mu1 = blur.blur(&img1); - let mu2 = blur.blur(&img2); + blur.blur(&img1_planar, &mut mu1)?; + blur.blur(&img2_planar, &mut mu2)?; let avg_ssim = ssim_map(width, height, &mu1, &mu2, &sigma1_sq, &sigma2_sq, &sigma12); - let avg_edgediff = edge_diff_map(width, height, &img1, &mu1, &img2, &mu2); + let avg_edgediff = edge_diff_map(width, height, &img1_planar, &mu1, &img2_planar, &mu2); + msssim.scales.push(MsssimScale { avg_ssim, avg_edgediff, @@ -126,6 +179,7 @@ where // B: 0.272295..0.938012 // The maximum pixel-wise difference has to be <= 1 for the ssim formula to make // sense. +#[inline(always)] fn make_positive_xyb(xyb: &mut Xyb) { for pix in xyb.data_mut().iter_mut() { pix[2] = (pix[2] - pix[1]) + 0.55; @@ -134,7 +188,8 @@ fn make_positive_xyb(xyb: &mut Xyb) { } } -fn xyb_to_planar(xyb: &Xyb) -> [Vec; 3] { +#[inline(always)] +pub fn xyb_to_planar(xyb: &Xyb) -> [Vec; 3] { let mut out1 = vec![0.0f32; xyb.width() * xyb.height()]; let mut out2 = vec![0.0f32; xyb.width() * xyb.height()]; let mut out3 = vec![0.0f32; xyb.width() * xyb.height()]; @@ -154,7 +209,9 @@ fn xyb_to_planar(xyb: &Xyb) -> [Vec; 3] { [out1, out2, out3] } -fn image_multiply(img1: &[Vec; 3], img2: &[Vec; 3], out: &mut [Vec; 3]) { +#[inline(always)] +// 2.3625ms +pub fn image_multiply(img1: &[Vec; 3], img2: &[Vec; 3], out: &mut [Vec; 3]) { for ((plane1, plane2), out_plane) in img1.iter().zip(img2.iter()).zip(out.iter_mut()) { for ((&p1, &p2), o) in plane1.iter().zip(plane2.iter()).zip(out_plane.iter_mut()) { *o = p1 * p2; @@ -162,7 +219,9 @@ fn image_multiply(img1: &[Vec; 3], img2: &[Vec; 3], out: &mut [Vec LinearRgb { +#[inline(always)] +/// 1.7866ms +pub fn downscale_by_2(in_data: &LinearRgb) -> LinearRgb { const SCALE: usize = 2; let in_w = in_data.width(); let in_h = in_data.height(); @@ -170,23 +229,69 @@ fn downscale_by_2(in_data: &LinearRgb) -> LinearRgb { let out_h = (in_h + SCALE - 1) / SCALE; let mut out_data = vec![[0.0f32; 3]; out_w * out_h]; let normalize = 1f32 / (SCALE * SCALE) as f32; + let in_data = in_data.data(); + + // "Process the inner area without boundaries first" + let safe_h = in_h - (in_h % SCALE); + let safe_w = in_w - (in_w % SCALE); + + for oy in 0..(safe_h / SCALE) { + let y_base = oy * SCALE; + for ox in 0..(safe_w / SCALE) { + let x_base = ox * SCALE; + let out_idx = oy * out_w + ox; + + // "Unroll the loop to access all pixels directly" + let p00 = in_data[y_base * in_w + x_base]; + let p01 = in_data[y_base * in_w + (x_base + 1)]; + let p10 = in_data[(y_base + 1) * in_w + x_base]; + let p11 = in_data[(y_base + 1) * in_w + (x_base + 1)]; + + out_data[out_idx][0] = (p00[0] + p01[0] + p10[0] + p11[0]) * normalize; + out_data[out_idx][1] = (p00[1] + p01[1] + p10[1] + p11[1]) * normalize; + out_data[out_idx][2] = (p00[2] + p01[2] + p10[2] + p11[2]) * normalize; + } + } - let in_data = &in_data.data(); - for oy in 0..out_h { - for ox in 0..out_w { - for c in 0..3 { - let mut sum = 0f32; - for iy in 0..SCALE { - for ix in 0..SCALE { - let x = (ox * SCALE + ix).min(in_w - 1); - let y = (oy * SCALE + iy).min(in_h - 1); - let in_pix = in_data[y * in_w + x]; - - sum += in_pix[c]; + if safe_w < in_w || safe_h < in_h { + for oy in 0..out_h { + let y_start = oy * SCALE; + for ox in 0..out_w { + if oy < safe_h / SCALE && ox < safe_w / SCALE { + continue; + } + + let x_start = ox * SCALE; + let out_idx = oy * out_w + ox; + let mut r = 0.0; + let mut g = 0.0; + let mut b = 0.0; + let mut count = 0; + + for dy in 0..SCALE { + let y = y_start + dy; + if y >= in_h { + continue; + } + + for dx in 0..SCALE { + let x = x_start + dx; + if x >= in_w { + continue; + } + + let pixel = in_data[y * in_w + x]; + r += pixel[0]; + g += pixel[1]; + b += pixel[2]; + count += 1; } } - let out_pix = &mut out_data[oy * out_w + ox]; - out_pix[c] = sum * normalize; + + let scale = if count > 0 { 1.0 / count as f32 } else { 0.0 }; + out_data[out_idx][0] = r * scale; + out_data[out_idx][1] = g * scale; + out_data[out_idx][2] = b * scale; } } } @@ -194,7 +299,9 @@ fn downscale_by_2(in_data: &LinearRgb) -> LinearRgb { LinearRgb::new(out_data, out_w, out_h).expect("Resolution and data size match") } -fn ssim_map( +#[inline(always)] +// 4.5360ms +pub fn ssim_map( width: usize, height: usize, m1: &[Vec; 3], @@ -203,56 +310,93 @@ fn ssim_map( s22: &[Vec; 3], s12: &[Vec; 3], ) -> [f64; 3 * 2] { - const C2: f32 = 0.0009f32; - let one_per_pixels = 1.0f64 / (width * height) as f64; let mut plane_averages = [0f64; 3 * 2]; - for c in 0..3 { - let mut sum1 = [0.0f64; 2]; - for (row_m1, (row_m2, (row_s11, (row_s22, row_s12)))) in m1[c].chunks_exact(width).zip( - m2[c].chunks_exact(width).zip( - s11[c] - .chunks_exact(width) - .zip(s22[c].chunks_exact(width).zip(s12[c].chunks_exact(width))), - ), - ) { - for x in 0..width { - let mu1 = row_m1[x]; - let mu2 = row_m2[x]; - let mu11 = mu1 * mu1; - let mu22 = mu2 * mu2; - let mu12 = mu1 * mu2; - let mu_diff = mu1 - mu2; - - // Correction applied compared to the original SSIM formula, which has: - // luma_err = 2 * mu1 * mu2 / (mu1^2 + mu2^2) - // = 1 - (mu1 - mu2)^2 / (mu1^2 + mu2^2) - // The denominator causes error in the darks (low mu1 and mu2) to weigh - // more than error in the brights (high mu1 and mu2). This would make - // sense if values correspond to linear luma. However, the actual values - // are either gamma-compressed luma (which supposedly is already - // perceptually uniform) or chroma (where weighing green more than red - // or blue more than yellow does not make any sense at all). So it is - // better to simply drop this denominator. - let num_m = mu_diff.mul_add(-mu_diff, 1.0f32); - let num_s = 2f32.mul_add(row_s12[x] - mu12, C2); - let denom_s = (row_s11[x] - mu11) + (row_s22[x] - mu22) + C2; - // Use 1 - SSIM' so it becomes an error score instead of a quality - // index. This makes it make sense to compute an L_4 norm. - let mut d = 1.0f64 - f64::from((num_m * num_s) / denom_s); - d = d.max(0.0); - sum1[0] += d; - sum1[1] += d.powi(4); - } + #[cfg(feature = "rayon")] + { + use rayon::prelude::*; + let results: Vec<(usize, [f64; 2])> = (0..3) + .into_par_iter() + .map(|c| { + let sums = compute_channel_sums(width, c, m1, m2, s11, s22, s12); + (c, sums) + }) + .collect(); + + for (c, sums) in results { + plane_averages[c * 2] = one_per_pixels * sums[0]; + plane_averages[c * 2 + 1] = (one_per_pixels * sums[1]).sqrt().sqrt(); + } + } + #[cfg(not(feature = "rayon"))] + { + for c in 0..3 { + let sums = compute_channel_sums(width, c, m1, m2, s11, s22, s12); + plane_averages[c * 2] = one_per_pixels * sums[0]; + plane_averages[c * 2 + 1] = (one_per_pixels * sums[1]).sqrt().sqrt(); } - plane_averages[c * 2] = one_per_pixels * sum1[0]; - plane_averages[c * 2 + 1] = (one_per_pixels * sum1[1]).sqrt().sqrt(); } plane_averages } +#[inline(always)] +fn compute_channel_sums( + width: usize, + c: usize, + m1: &[Vec; 3], + m2: &[Vec; 3], + s11: &[Vec; 3], + s22: &[Vec; 3], + s12: &[Vec; 3], +) -> [f64; 2] { + const C2: f32 = 0.0009f32; + let mut sums = [0.0f64; 2]; + + for (row_idx, (row_m1, row_m2)) in m1[c] + .chunks_exact(width) + .zip(m2[c].chunks_exact(width)) + .enumerate() + { + let row_offset = row_idx * width; + for x in 0..width { + let mu1 = row_m1[x]; + let mu2 = row_m2[x]; + let mu_diff = mu1 - mu2; + + // Correction applied compared to the original SSIM formula, which has: + // luma_err = 2 * mu1 * mu2 / (mu1^2 + mu2^2) + // = 1 - (mu1 - mu2)^2 / (mu1^2 + mu2^2) + // The denominator causes error in the darks (low mu1 and mu2) to weigh + // more than error in the brights (high mu1 and mu2). This would make + // sense if values correspond to linear luma. However, the actual values + // are either gamma-compressed luma (which supposedly is already + // perceptually uniform) or chroma (where weighing green more than red + // or blue more than yellow does not make any sense at all). So it is + // better to simply drop this denominator. + let num_m = mu_diff.mul_add(-mu_diff, 1.0); + let mu12 = mu1 * mu2; + let sigma12 = s12[c][row_offset + x] - mu12; + let num_s = 2.0f32.mul_add(sigma12, C2); + let sigma1_sq = s11[c][row_offset + x] - mu1 * mu1; + let sigma2_sq = s22[c][row_offset + x] - mu2 * mu2; + let denom_s = sigma1_sq + sigma2_sq + C2; + + let ssim = (num_m * num_s) / denom_s; + let err = (1.0 - f64::from(ssim)).max(0.0); + + sums[0] += err; + let err_sq = err * err; + sums[1] += err_sq * err_sq; + } + } + + sums +} + +#[inline(always)] +// 12.492ms ssimulacra2 bench fn edge_diff_map( width: usize, height: usize, @@ -265,34 +409,46 @@ fn edge_diff_map( let mut plane_averages = [0f64; 3 * 4]; for c in 0..3 { - let mut sum1 = [0.0f64; 4]; + // cache + let mut artifact_sum = 0.0f64; + let mut artifact_pow4_sum = 0.0f64; + let mut detail_lost_sum = 0.0f64; + let mut detail_lost_pow4_sum = 0.0f64; + for (row1, (row2, (rowm1, rowm2))) in img1[c].chunks_exact(width).zip( img2[c] .chunks_exact(width) .zip(mu1[c].chunks_exact(width).zip(mu2[c].chunks_exact(width))), ) { for x in 0..width { - let d1: f64 = (1.0 + f64::from((row2[x] - rowm2[x]).abs())) - / (1.0 + f64::from((row1[x] - rowm1[x]).abs())) - - 1.0; + let diff1 = f64::from((row1[x] - rowm1[x]).abs()); + let diff2 = f64::from((row2[x] - rowm2[x]).abs()); + + let d1 = (1.0 + diff2) / (1.0 + diff1) - 1.0; // d1 > 0: distorted has an edge where original is smooth // (indicating ringing, color banding, blockiness, etc) - let artifact = d1.max(0.0); - sum1[0] += artifact; - sum1[1] += artifact.powi(4); + let artifact = 0.5 * (d1 + d1.abs()); // if d1 is positive, d1; if negative, 0 + artifact_sum += artifact; + + let artifact_squared = artifact * artifact; + artifact_pow4_sum += artifact_squared * artifact_squared; // d1 < 0: original has an edge where distorted is smooth // (indicating smoothing, blurring, smearing, etc) - let detail_lost = (-d1).max(0.0); - sum1[2] += detail_lost; - sum1[3] += detail_lost.powi(4); + let detail_lost = 0.5 * (-d1 + d1.abs()); // if d1 is negative, -d1; if positive, 0 + detail_lost_sum += detail_lost; + let detail_lost_squared = detail_lost * detail_lost; + detail_lost_pow4_sum += detail_lost_squared * detail_lost_squared; } } - plane_averages[c * 4] = one_per_pixels * sum1[0]; - plane_averages[c * 4 + 1] = (one_per_pixels * sum1[1]).sqrt().sqrt(); - plane_averages[c * 4 + 2] = one_per_pixels * sum1[2]; - plane_averages[c * 4 + 3] = (one_per_pixels * sum1[3]).sqrt().sqrt(); + + // Calculate the averages for the current channel + let base_idx = c * 4; + plane_averages[base_idx] = one_per_pixels * artifact_sum; + plane_averages[base_idx + 1] = (one_per_pixels * artifact_pow4_sum).sqrt().sqrt(); + plane_averages[base_idx + 2] = one_per_pixels * detail_lost_sum; + plane_averages[base_idx + 3] = (one_per_pixels * detail_lost_pow4_sum).sqrt().sqrt(); } plane_averages @@ -341,6 +497,7 @@ impl Msssim { // KADID-10k: 0.6175 | 0.8133 | 0.8030 // KonFiG(F): 0.7668 | 0.9194 | 0.9136 #[allow(clippy::too_many_lines)] + #[inline(always)] pub fn score(&self) -> f64 { const WEIGHT: [f64; 108] = [ 0.0, @@ -489,7 +646,7 @@ impl Msssim { #[cfg(test)] mod tests { - use std::path::PathBuf; + use std::{path::PathBuf, time::Instant}; use super::*; use yuvxyb::Rgb; @@ -540,8 +697,15 @@ mod tests { .unwrap(), ) .unwrap(); + let start = Instant::now(); let result = compute_frame_ssimulacra2(source_data, distorted_data).unwrap(); + let elapsed = start.elapsed(); + println!("Elapsed time: {:?}", elapsed); let expected = 17.398_505_f64; + + println!("Result: {result:.6}"); + println!("Expected: {expected:.6}"); + println!("Diff: {:.3}", (result - expected).abs()); assert!( // SOMETHING is WEIRD with Github CI where it gives different results across DIFFERENT // RUNS @@ -549,4 +713,67 @@ mod tests { "Result {result:.6} not equal to expected {expected:.6}", ); } + + #[test] + fn test2_ssimulacra2() { + let source = image::open( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test_data") + .join("test_image_4.png"), + ) + .unwrap(); + let distorted = image::open( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test_data") + .join("test_image_4.jpg"), + ) + .unwrap(); + + let source_data = source + .to_rgb32f() + .chunks_exact(3) + .map(|chunk| [chunk[0], chunk[1], chunk[2]]) + .collect::>(); + let source_data = Xyb::try_from( + Rgb::new( + source_data, + source.width() as usize, + source.height() as usize, + TransferCharacteristic::SRGB, + ColorPrimaries::BT709, + ) + .unwrap(), + ) + .unwrap(); + let distorted_data = distorted + .to_rgb32f() + .chunks_exact(3) + .map(|chunk| [chunk[0], chunk[1], chunk[2]]) + .collect::>(); + let distorted_data = Xyb::try_from( + Rgb::new( + distorted_data, + distorted.width() as usize, + distorted.height() as usize, + TransferCharacteristic::SRGB, + ColorPrimaries::BT709, + ) + .unwrap(), + ) + .unwrap(); + let start = Instant::now(); + let result = compute_frame_ssimulacra2(source_data, distorted_data).unwrap(); + let elapsed = start.elapsed(); + println!("Elapsed time: {:?}", elapsed); + let expected = 84.031_931_f64; + println!("Result: {result:.6}"); + println!("Expected: {expected:.6}"); + println!("Diff: {:.3}", (result - expected).abs()); + assert!( + // SOMETHING is WEIRD with Github CI where it gives different results across DIFFERENT + // RUNS + (result - expected).abs() < 1.0f64, + "Result {result:.6} not equal to expected {expected:.6}", + ); + } } diff --git a/test_data/test_image_4.jpg b/test_data/test_image_4.jpg new file mode 100644 index 0000000..a9c3e14 Binary files /dev/null and b/test_data/test_image_4.jpg differ diff --git a/test_data/test_image_4.png b/test_data/test_image_4.png new file mode 100644 index 0000000..3bb0764 Binary files /dev/null and b/test_data/test_image_4.png differ