Skip to content

Commit d555b7e

Browse files
committed
perf: improvements to scenecut detection speed
- Actually enable SIMD for scenecut detection. Oops. - Always use the VapoursynthDecoder instead of piping to reduce memory bandwidth overhead. - Use the default cachepath behavior (place the cache file next to the video), this avoids needing to re-cache if av1an's input hash changes, and for some reason is just faster.
1 parent 1ac0b9d commit d555b7e

File tree

7 files changed

+47
-112
lines changed

7 files changed

+47
-112
lines changed

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

av1an-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ av-decoders = { version = "0.4.0", features = ["vapoursynth"] }
2222
av-format = "0.7.0"
2323
av-ivf = "0.5.0"
2424
av-scenechange = { version = "0.17.3", default-features = false, features = [
25+
"asm",
2526
"vapoursynth",
2627
] }
2728
av1-grain = { version = "0.2.4", default-features = false, features = [
@@ -160,7 +161,6 @@ ref_option_ref = "warn"
160161
# Disabled due to https://github.com/rust-lang/rust-clippy/issues/14697
161162
# self_named_module_files = "warn"
162163
semicolon_if_nothing_returned = "warn"
163-
string_to_string = "warn"
164164
tests_outside_test_module = "warn"
165165
transmute_ptr_to_ptr = "warn"
166166
unused_peekable = "warn"

av1an-core/src/context.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -922,9 +922,14 @@ impl Av1anContext {
922922
passes: overrides.as_ref().map_or(self.args.passes, |ovr| ovr.passes),
923923
encoder: overrides.as_ref().map_or(self.args.encoder, |ovr| ovr.encoder),
924924
noise_size: self.args.photon_noise_size,
925-
target_quality: overrides.as_ref().map_or(self.args.target_quality.clone(), |ovr| {
926-
ovr.target_quality.clone().map_or(self.args.target_quality.clone(), |tq| tq)
927-
}),
925+
target_quality: overrides.as_ref().map_or_else(
926+
|| self.args.target_quality.clone(),
927+
|ovr| {
928+
ovr.target_quality
929+
.clone()
930+
.map_or_else(|| self.args.target_quality.clone(), |tq| tq)
931+
},
932+
),
928933
tq_cq: None,
929934
ignore_frame_mismatch: self.args.ignore_frame_mismatch,
930935
};
@@ -1038,8 +1043,8 @@ impl Av1anContext {
10381043
noise_size: scene.zone_overrides.as_ref().map_or(self.args.photon_noise_size, |ovr| {
10391044
(ovr.photon_noise_width, ovr.photon_noise_height)
10401045
}),
1041-
target_quality: scene.zone_overrides.as_ref().map_or(
1042-
self.args.target_quality.clone(),
1046+
target_quality: scene.zone_overrides.as_ref().map_or_else(
1047+
|| self.args.target_quality.clone(),
10431048
|ovr| {
10441049
ovr.target_quality.clone().unwrap_or_else(|| self.args.target_quality.clone())
10451050
},
@@ -1275,9 +1280,14 @@ impl Av1anContext {
12751280
passes: overrides.as_ref().map_or(self.args.passes, |ovr| ovr.passes),
12761281
encoder: overrides.as_ref().map_or(self.args.encoder, |ovr| ovr.encoder),
12771282
noise_size: self.args.photon_noise_size,
1278-
target_quality: overrides.as_ref().map_or(self.args.target_quality.clone(), |ovr| {
1279-
ovr.target_quality.clone().map_or(self.args.target_quality.clone(), |tq| tq)
1280-
}),
1283+
target_quality: overrides.as_ref().map_or_else(
1284+
|| self.args.target_quality.clone(),
1285+
|ovr| {
1286+
ovr.target_quality
1287+
.clone()
1288+
.map_or_else(|| self.args.target_quality.clone(), |tq| tq)
1289+
},
1290+
),
12811291
tq_cq: None,
12821292
ignore_frame_mismatch: self.args.ignore_frame_mismatch,
12831293
};

av1an-core/src/loadscript.vpy

Lines changed: 11 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,9 @@
11
import os
2-
import vapoursynth
32
from vapoursynth import core
43

5-
# Set cache size to 1GB
6-
core.max_cache_size = 1024
7-
84
source = os.environ.get("AV1AN_SOURCE", None)
95
chunk_method = os.environ.get("AV1AN_CHUNK_METHOD", None)
10-
cache_file = os.environ.get("AV1AN_CACHE_FILE", None)
11-
12-
# Scene Detection
13-
perform_scene_detection = os.environ.get("AV1AN_PERFORM_SCENE_DETECTION", None)
14-
downscale_height = os.environ.get("AV1AN_DOWNSCALE_HEIGHT", None)
15-
pixel_format = os.environ.get("AV1AN_PIXEL_FORMAT", None)
16-
scaler = os.environ.get("AV1AN_SCALER", None)
6+
perform_scene_detection = globals().get("AV1AN_PERFORM_SCENE_DETECTION", None)
177

188
# Default valid chunk methods
199
VALID_CHUNK_METHODS: list[str] = ["lsmash", "ffms2", "dgdecnv", "bestsource"]
@@ -33,61 +23,22 @@ if not os.path.exists(source):
3323
# Import video
3424
match (chunk_method): # type: ignore
3525
case "lsmash":
36-
video = core.lsmas.LWLibavSource(source, cachefile=cache_file)
26+
video = core.lsmas.LWLibavSource(source)
3727
case "ffms2":
38-
video = core.ffms2.Source(source, cachefile=cache_file)
28+
video = core.ffms2.Source(source)
3929
case "dgdecnv":
4030
video = core.dgdecodenv.DGSource(source)
4131
case "bestsource":
42-
# Different versions of BestSource have different behaviors on Windows
43-
# Versions R1 and older support absolute paths just as Av1an originally expected (with .json extension)
44-
# Versions R8 and newer support absolute paths for cache files, but require setting cachemode to 4
45-
# Versions since ~R2 attempt to create a path stemming from CWD but using the path of the source and also appends the track index and a .bsindex extension, which is unexpected for Av1an
46-
# Unfortunately, BestSource is not keeping the reported version number updated properly so we cannot reliably determine if it supports absolute paths or not
47-
# At best, we can wrap an attempt in a try/except block as previous versions of BestSource should throw an exception if an invalid cachemode value is provided
48-
try:
49-
video = core.bs.VideoSource(source, cachepath=cache_file, cachemode=4)
50-
except Exception:
51-
# Installed BestSource version does not support absolute paths, fallback to default behavior
52-
video = core.bs.VideoSource(source, cachepath=cache_file)
53-
54-
# Scene Detection
55-
# Bicubic is based on FFmpeg defaults. See https://ffmpeg.org/ffmpeg-scaler.html#toc-Scaler-Options
56-
scaler_function = core.resize.Bicubic
57-
# Map scaler parameter to equivalent vapoursynth scaler. See https://www.vapoursynth.com/doc/functions/video/resize.html#resize
58-
if scaler is not None:
59-
match (scaler.lower()):
60-
case "fast_bilinear" | "bilinear": scaler_function = core.resize.Bilinear
61-
case "neighbor": scaler_function = core.resize.Point
62-
case _: scaler_function = core.resize.Bicubic
63-
64-
# Map pixel format to equivalent vapoursynth pixel format (Needs expansion)
65-
if pixel_format is not None:
66-
match (pixel_format.lower()):
67-
case "yuv420p": pixel_format = vapoursynth.YUV420P8
68-
case "yuv420p10le": pixel_format = vapoursynth.YUV420P10
69-
case _: pixel_format = None
32+
# bestsource has the default behavior to store its index files in a user-specific directory
33+
# but for consistency, this setting makes it store the index file next to the video
34+
# as all the other source filters do
35+
video = core.bs.VideoSource(source, cachepath="/")
7036

71-
# Apply Scene Detection Processing
72-
if perform_scene_detection is not None:
73-
if downscale_height is not None or pixel_format is not None:
74-
# Ensure downscale_height is not greater than video height
75-
if downscale_height is not None:
76-
try:
77-
downscale_height = int(downscale_height)
78-
finally:
79-
downscale_height = min(downscale_height, video.height)
80-
# Maintain aspect ratio and ensure width is divisible by 2
81-
video = scaler_function(
82-
video,
83-
width=int(((video.width / video.height) * int(downscale_height)) // 2 * 2) if downscale_height is not None else video.width,
84-
height=int(downscale_height or video.height),
85-
format=pixel_format,
86-
)
87-
else:
88-
# Limit to one thread when encoding
37+
if perform_scene_detection is None:
38+
# Limit decoder resources when encoding since we will have multiple workers running
8939
core.num_threads = 1
40+
core.max_cache_size = 1024
9041

9142

9243
# Output video
93-
video.set_output()
44+
video.set_output()

av1an-core/src/scene_detect.rs

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -241,16 +241,15 @@ fn build_decoder(
241241
clip_info.format_info.as_bit_depth()?
242242
};
243243

244-
let decoder = if input.is_vapoursynth() {
244+
let decoder = if input.is_vapoursynth() || input.is_vapoursynth_script() {
245245
// VapoursynthDecoder is the only reliable method for downscaling user-provided
246-
// scripts
246+
// scripts, and for our generated scripts, it is faster than piping.
247247

248248
// Must use from_file in order to set the CWD to the
249249
// directory of the user-provided VapourSynth script
250-
let mut vs_decoder = VapoursynthDecoder::from_file(
251-
input.as_vapoursynth_path(),
252-
input.as_vspipe_args_hashmap()?,
253-
)?;
250+
let mut args_map = input.as_vspipe_args_hashmap()?;
251+
args_map.insert("AV1AN_PERFORM_SCENE_DETECTION".into(), "1".into());
252+
let mut vs_decoder = VapoursynthDecoder::from_file(input.as_script_path(), args_map)?;
254253

255254
if sc_downscale_height.is_some() || sc_pix_format.is_some() {
256255
let downscale_height = sc_downscale_height.map(|dh| dh as u32);
@@ -286,38 +285,6 @@ fn build_decoder(
286285
}
287286

288287
Decoder::from_decoder_impl(DecoderImpl::Vapoursynth(vs_decoder))?
289-
} else if input.is_vapoursynth_script() {
290-
// User provides a video input but is using a Vapoursynth-based chunk method.
291-
// This may be slower than using ffmpeg but by using the same source filter,
292-
// we ensure consistency in decoding.
293-
let mut command = Command::new("vspipe");
294-
295-
if let Some(downscale_height) = sc_downscale_height {
296-
command.env("AV1AN_DOWNSCALE_HEIGHT", downscale_height.to_string());
297-
}
298-
if let Some(pixel_format) = sc_pix_format {
299-
command.env("AV1AN_PIXEL_FORMAT", format!("{pixel_format:?}"));
300-
}
301-
302-
command
303-
.arg("-c")
304-
.arg("y4m")
305-
.arg(input.as_script_path())
306-
.arg("-")
307-
.env("AV1AN_PERFORM_SCENE_DETECTION", "true")
308-
.env("AV1AN_SCALER", sc_scaler)
309-
.stdin(Stdio::null())
310-
.stdout(Stdio::piped())
311-
.stderr(Stdio::null());
312-
// Append vspipe python arguments to the environment if there are any
313-
for arg in input.as_vspipe_args_vec()? {
314-
command.args(["-a", &arg]);
315-
}
316-
317-
let y4m_decoder = Y4mDecoder::new(Box::new(
318-
command.spawn()?.stdout.expect("vspipe should have stdout"),
319-
) as Box<dyn Read>)?;
320-
Decoder::from_decoder_impl(DecoderImpl::Y4m(y4m_decoder))?
321288
} else {
322289
// FFmpeg is faster if the user provides video input
323290
let path = input.as_path();

av1an-core/src/vapoursynth.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -829,10 +829,6 @@ pub fn generate_loadscript_text(
829829
.replace(
830830
"chunk_method = os.environ.get(\"AV1AN_CHUNK_METHOD\", None)",
831831
&format!("chunk_method = {chunk_method_lower:?}"),
832-
)
833-
.replace(
834-
"cache_file = os.environ.get(\"AV1AN_CACHE_FILE\", None)",
835-
&format!("cache_file = {cache_file:?}"),
836832
);
837833

838834
if let Some(scene_detection_downscale_height) = scene_detection_downscale_height {

av1an/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ ref_option_ref = "warn"
117117
# Disabled due to https://github.com/rust-lang/rust-clippy/issues/14697
118118
# self_named_module_files = "warn"
119119
semicolon_if_nothing_returned = "warn"
120-
string_to_string = "warn"
121120
tests_outside_test_module = "warn"
122121
transmute_ptr_to_ptr = "warn"
123122
unused_peekable = "warn"

0 commit comments

Comments
 (0)