1
1
import collections
2
2
from common_utils import get_tmp_dir
3
3
from fractions import Fraction
4
- import logging
5
4
import math
6
5
import numpy as np
7
6
import os
106
105
torch .ops .load_library (lib_path )
107
106
video_reader = torch .ops .video_reader
108
107
109
- log = logging .getLogger (__name__ )
110
-
111
108
DecoderResult = collections .namedtuple (
112
109
"DecoderResult" , "vframes vframe_pts vtimebase aframes aframe_pts atimebase"
113
110
)
@@ -233,7 +230,6 @@ def _decode_frames_by_av_module(
233
230
[audio_frame .pts for audio_frame in audio_frames ], dtype = torch .int64
234
231
)
235
232
236
- log .error ("Decode by PyAv. Elapsed time: %2.4f\n " % elapsed_time )
237
233
238
234
return DecoderResult (
239
235
vframes = vframes ,
@@ -343,7 +339,6 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config
343
339
"By default, it is disabled"
344
340
)
345
341
def test_stress_test_read_video_from_file (self ):
346
- log .error ("\n \t ===========stress test: read_video_from_file" )
347
342
num_iter = 10000
348
343
# video related
349
344
width , height , min_dimension = 0 , 0 , 0
@@ -355,10 +350,7 @@ def test_stress_test_read_video_from_file(self):
355
350
audio_timebase_num , audio_timebase_den = 0 , 1
356
351
357
352
for i in range (num_iter ):
358
- if i % 100 == 0 :
359
- log .error ("%d / %d" % (i , num_iter ))
360
353
for test_video , config in test_videos .items ():
361
- log .error ("\n video: %s\n " % test_video )
362
354
full_path = os .path .join (VIDEO_DIR , test_video )
363
355
364
356
# pass 1: decode all frames using new decoder
@@ -384,7 +376,6 @@ def test_stress_test_read_video_from_file(self):
384
376
385
377
386
378
def test_read_video_from_file (self ):
387
- log .error ("\n \t ===========test_read_video_from_file" )
388
379
"""
389
380
Test the case when decoder starts with a video file to decode frames.
390
381
"""
@@ -398,7 +389,6 @@ def test_read_video_from_file(self):
398
389
audio_timebase_num , audio_timebase_den = 0 , 1
399
390
400
391
for test_video , config in test_videos .items ():
401
- log .error ("\n video: %s\n " % test_video )
402
392
full_path = os .path .join (VIDEO_DIR , test_video )
403
393
404
394
# pass 1: decode all frames using new decoder
@@ -422,9 +412,6 @@ def test_read_video_from_file(self):
422
412
audio_timebase_den ,
423
413
)
424
414
elapsed_time = time .perf_counter () - start_time
425
- log .error (
426
- "TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time
427
- )
428
415
# pass 2: decode all frames using av
429
416
pyav_result = _decode_frames_by_av_module (full_path )
430
417
# check results from TorchVision decoder
@@ -433,7 +420,6 @@ def test_read_video_from_file(self):
433
420
self .compare_decoding_result (tv_result , pyav_result , config )
434
421
435
422
def test_read_video_from_file_rescale_min_dimension (self ):
436
- log .error ("\n \t ===========test_read_video_from_file_rescale_min_dimension" )
437
423
"""
438
424
Test the case when decoder starts with a video file to decode frames, and
439
425
video min dimension between height and width is set.
@@ -448,7 +434,6 @@ def test_read_video_from_file_rescale_min_dimension(self):
448
434
audio_timebase_num , audio_timebase_den = 0 , 1
449
435
450
436
for test_video , config in test_videos .items ():
451
- log .error ("\n video: %s\n " % test_video )
452
437
full_path = os .path .join (VIDEO_DIR , test_video )
453
438
454
439
start_time = time .perf_counter ()
@@ -471,11 +456,9 @@ def test_read_video_from_file_rescale_min_dimension(self):
471
456
audio_timebase_den ,
472
457
)
473
458
elapsed_time = time .perf_counter () - start_time
474
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
475
459
self .assertEqual (min_dimension , min (tv_result [0 ].size (1 ), tv_result [0 ].size (2 )))
476
460
477
461
def test_read_video_from_file_rescale_width (self ):
478
- log .error ("\n \t ===========test_read_video_from_file_rescale_width" )
479
462
"""
480
463
Test the case when decoder starts with a video file to decode frames, and
481
464
video width is set.
@@ -490,7 +473,6 @@ def test_read_video_from_file_rescale_width(self):
490
473
audio_timebase_num , audio_timebase_den = 0 , 1
491
474
492
475
for test_video , config in test_videos .items ():
493
- log .error ("\n video: %s\n " % test_video )
494
476
full_path = os .path .join (VIDEO_DIR , test_video )
495
477
496
478
start_time = time .perf_counter ()
@@ -513,11 +495,9 @@ def test_read_video_from_file_rescale_width(self):
513
495
audio_timebase_den ,
514
496
)
515
497
elapsed_time = time .perf_counter () - start_time
516
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
517
498
self .assertEqual (tv_result [0 ].size (2 ), width )
518
499
519
500
def test_read_video_from_file_rescale_height (self ):
520
- log .error ("\n \t ===========test_read_video_from_file_rescale_height" )
521
501
"""
522
502
Test the case when decoder starts with a video file to decode frames, and
523
503
video height is set.
@@ -532,7 +512,6 @@ def test_read_video_from_file_rescale_height(self):
532
512
audio_timebase_num , audio_timebase_den = 0 , 1
533
513
534
514
for test_video , config in test_videos .items ():
535
- log .error ("\n video: %s\n " % test_video )
536
515
full_path = os .path .join (VIDEO_DIR , test_video )
537
516
538
517
start_time = time .perf_counter ()
@@ -555,11 +534,9 @@ def test_read_video_from_file_rescale_height(self):
555
534
audio_timebase_den ,
556
535
)
557
536
elapsed_time = time .perf_counter () - start_time
558
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
559
537
self .assertEqual (tv_result [0 ].size (1 ), height )
560
538
561
539
def test_read_video_from_file_rescale_width_and_height (self ):
562
- log .error ("\n \t ===========test_read_video_from_file_rescale_width_and_height" )
563
540
"""
564
541
Test the case when decoder starts with a video file to decode frames, and
565
542
both video height and width are set.
@@ -574,7 +551,6 @@ def test_read_video_from_file_rescale_width_and_height(self):
574
551
audio_timebase_num , audio_timebase_den = 0 , 1
575
552
576
553
for test_video , config in test_videos .items ():
577
- log .error ("\n video: %s\n " % test_video )
578
554
full_path = os .path .join (VIDEO_DIR , test_video )
579
555
580
556
start_time = time .perf_counter ()
@@ -597,13 +573,11 @@ def test_read_video_from_file_rescale_width_and_height(self):
597
573
audio_timebase_den ,
598
574
)
599
575
elapsed_time = time .perf_counter () - start_time
600
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
601
576
self .assertEqual (tv_result [0 ].size (1 ), height )
602
577
self .assertEqual (tv_result [0 ].size (2 ), width )
603
578
604
579
605
580
def test_read_video_from_file_audio_resampling (self ):
606
- log .error ("\n \t ===========test_read_video_from_file_audio_resampling" )
607
581
"""
608
582
Test the case when decoder starts with a video file to decode frames, and
609
583
audio waveform are resampled
@@ -623,7 +597,6 @@ def test_read_video_from_file_audio_resampling(self):
623
597
audio_timebase_num , audio_timebase_den = 0 , 1
624
598
625
599
for test_video , config in test_videos .items ():
626
- log .error ("\n video: %s\n " % test_video )
627
600
full_path = os .path .join (VIDEO_DIR , test_video )
628
601
629
602
tv_result = video_reader .read_video_from_file (
@@ -659,7 +632,6 @@ def test_read_video_from_file_audio_resampling(self):
659
632
)
660
633
661
634
def test_compare_read_video_from_memory_and_file (self ):
662
- log .error ("\n \t ===========test_compare_read_video_from_memory_and_file" )
663
635
"""
664
636
Test the case when video is already in memory, and decoder reads data in memory
665
637
"""
@@ -673,7 +645,6 @@ def test_compare_read_video_from_memory_and_file(self):
673
645
audio_timebase_num , audio_timebase_den = 0 , 1
674
646
675
647
for test_video , config in test_videos .items ():
676
- log .error ("\n video: %s\n " % test_video )
677
648
full_path , video_tensor = _get_video_tensor (VIDEO_DIR , test_video )
678
649
679
650
# pass 1: decode all frames using cpp decoder
@@ -697,9 +668,6 @@ def test_compare_read_video_from_memory_and_file(self):
697
668
audio_timebase_den ,
698
669
)
699
670
elapsed_time = time .perf_counter () - start_time
700
- log .error (
701
- "TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time
702
- )
703
671
self .check_separate_decoding_result (tv_result_memory , config )
704
672
# pass 2: decode all frames from file
705
673
start_time = time .perf_counter ()
@@ -722,15 +690,13 @@ def test_compare_read_video_from_memory_and_file(self):
722
690
audio_timebase_den ,
723
691
)
724
692
elapsed_time = time .perf_counter () - start_time
725
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
726
693
727
694
self .check_separate_decoding_result (tv_result_file , config )
728
695
# finally, compare results decoded from memory and file
729
696
self .compare_decoding_result (tv_result_memory , tv_result_file )
730
697
731
698
732
699
def test_read_video_from_file (self ):
733
- log .error ("\n \t ===========test_read_video_from_file" )
734
700
"""
735
701
Test the case when decoder starts with a video file to decode frames.
736
702
"""
@@ -744,7 +710,6 @@ def test_read_video_from_file(self):
744
710
audio_timebase_num , audio_timebase_den = 0 , 1
745
711
746
712
for test_video , config in test_videos .items ():
747
- log .error ("\n video: %s\n " % test_video )
748
713
full_path = os .path .join (VIDEO_DIR , test_video )
749
714
750
715
# pass 1: decode all frames using new decoder
@@ -768,7 +733,6 @@ def test_read_video_from_file(self):
768
733
audio_timebase_den ,
769
734
)
770
735
elapsed_time = time .perf_counter () - start_time
771
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
772
736
# pass 2: decode all frames using av
773
737
pyav_result = _decode_frames_by_av_module (full_path )
774
738
@@ -777,7 +741,6 @@ def test_read_video_from_file(self):
777
741
self .compare_decoding_result (tv_result , pyav_result , config )
778
742
779
743
def test_read_video_from_memory (self ):
780
- log .error ("\n \t ===========test_read_video_from_memory" )
781
744
"""
782
745
Test the case when video is already in memory, and decoder reads data in memory
783
746
"""
@@ -791,7 +754,6 @@ def test_read_video_from_memory(self):
791
754
audio_timebase_num , audio_timebase_den = 0 , 1
792
755
793
756
for test_video , config in test_videos .items ():
794
- log .error ("\n video: %s\n " % test_video )
795
757
full_path , video_tensor = _get_video_tensor (VIDEO_DIR , test_video )
796
758
797
759
# pass 1: decode all frames using cpp decoder
@@ -815,15 +777,13 @@ def test_read_video_from_memory(self):
815
777
audio_timebase_den ,
816
778
)
817
779
elapsed_time = time .perf_counter () - start_time
818
- log .error ("TorchVision decoder. Elapsed time: %2.4f\n " % elapsed_time )
819
780
# pass 2: decode all frames using av
820
781
pyav_result = _decode_frames_by_av_module (full_path )
821
782
822
783
self .check_separate_decoding_result (tv_result , config )
823
784
self .compare_decoding_result (tv_result , pyav_result , config )
824
785
825
786
def test_read_video_from_memory_get_pts_only (self ):
826
- log .error ("\n \t ===========test_read_video_from_memory_get_pts_only" )
827
787
"""
828
788
Test the case when video is already in memory, and decoder reads data in memory.
829
789
Compare frame pts between decoding for pts only and full decoding
@@ -839,7 +799,6 @@ def test_read_video_from_memory_get_pts_only(self):
839
799
audio_timebase_num , audio_timebase_den = 0 , 1
840
800
841
801
for test_video , config in test_videos .items ():
842
- log .error ("\n video: %s\n " % test_video )
843
802
full_path , video_tensor = _get_video_tensor (VIDEO_DIR , test_video )
844
803
845
804
# pass 1: decode all frames using cpp decoder
@@ -863,7 +822,6 @@ def test_read_video_from_memory_get_pts_only(self):
863
822
audio_timebase_den ,
864
823
)
865
824
elapsed_time = time .perf_counter () - start_time
866
- log .error ("getPtsOnly = 0. Elapsed time: %2.4f" % elapsed_time )
867
825
self .assertAlmostEqual (config .video_fps , tv_result [3 ].item (), delta = 0.01 )
868
826
869
827
# pass 2: decode all frames to get PTS only using cpp decoder
@@ -887,21 +845,18 @@ def test_read_video_from_memory_get_pts_only(self):
887
845
audio_timebase_den ,
888
846
)
889
847
elapsed_time = time .perf_counter () - start_time
890
- log .error ("getPtsOnly = 1. Elapsed time: %2.4f\n " % elapsed_time )
891
848
892
849
self .assertEqual (tv_result_pts_only [0 ].numel (), 0 )
893
850
self .assertEqual (tv_result_pts_only [4 ].numel (), 0 )
894
851
self .compare_decoding_result (tv_result , tv_result_pts_only )
895
852
896
853
def test_read_video_in_range_from_memory (self ):
897
- log .error ("\n \t test_read_video_in_range_from_memory" )
898
854
"""
899
855
Test the case when video is already in memory, and decoder reads data in memory.
900
856
In addition, decoder takes meaningful start- and end PTS as input, and decode
901
857
frames within that interval
902
858
"""
903
859
for test_video , config in test_videos .items ():
904
- log .error ("\n video: %s\n " % test_video )
905
860
full_path , video_tensor = _get_video_tensor (VIDEO_DIR , test_video )
906
861
# video related
907
862
width , height , min_dimension = 0 , 0 , 0
@@ -935,16 +890,11 @@ def test_read_video_in_range_from_memory(self):
935
890
tv_result
936
891
)
937
892
elapsed_time = time .perf_counter () - start_time
938
- log .error (
939
- "Decode whole video by TorchVision decoder. Elapsed time: %2.4f" % elapsed_time
940
- )
941
893
self .assertAlmostEqual (config .video_fps , vfps .item (), delta = 0.01 )
942
894
943
895
for num_frames in [4 , 8 , 16 , 32 , 64 , 128 ]:
944
- log .error ("\n video: %s No. of frames: %d" % (test_video , num_frames ))
945
896
start_pts_ind_max = vframe_pts .size (0 ) - num_frames
946
897
if start_pts_ind_max <= 0 :
947
- log .error ("Skip." )
948
898
continue
949
899
# randomly pick start pts
950
900
start_pts_ind = randint (0 , start_pts_ind_max )
@@ -990,9 +940,6 @@ def test_read_video_in_range_from_memory(self):
990
940
audio_timebase_den ,
991
941
)
992
942
elapsed_time = time .perf_counter () - start_time
993
- log .error (
994
- "TorchVision decoder. Elapsed time: %2.4f" % elapsed_time
995
- )
996
943
997
944
# pass 3: decode frames in range using PyAv
998
945
video_timebase_av , audio_timebase_av = _get_timebase_by_av_module (full_path )
0 commit comments