jinensetpal
/
vision
connected to https://github.com/jinensetpal/vision.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
            import math
import os

import pytest
import torch
import torchvision
from torchvision.io import _HAS_GPU_VIDEO_DECODER, VideoReader

try:
    import av
except ImportError:
    av = None

VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")


@pytest.mark.skipif(_HAS_GPU_VIDEO_DECODER is False, reason="Didn't compile with support for gpu decoder")
class TestVideoGPUDecoder:
    @pytest.mark.skipif(av is None, reason="PyAV unavailable")
    @pytest.mark.parametrize(
        "video_file",
        [
            "RATRACE_wave_f_nm_np1_fr_goo_37.avi",
            "TrumanShow_wave_f_nm_np1_fr_med_26.avi",
            "v_SoccerJuggling_g23_c01.avi",
            "v_SoccerJuggling_g24_c01.avi",
            "R6llTwEh07w.mp4",
            "SOX5yA1l24A.mp4",
            "WUzgd7C1pWA.mp4",
        ],
    )
    def test_frame_reading(self, video_file):
        torchvision.set_video_backend("cuda")
        full_path = os.path.join(VIDEO_DIR, video_file)
        decoder = VideoReader(full_path)
        with av.open(full_path) as container:
            for av_frame in container.decode(container.streams.video[0]):
                av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray())
                vision_frames = next(decoder)["data"]
                mean_delta = torch.mean(torch.abs(av_frames.float() - vision_frames.cpu().float()))
                assert mean_delta < 0.75

    @pytest.mark.skipif(av is None, reason="PyAV unavailable")
    @pytest.mark.parametrize("keyframes", [True, False])
    @pytest.mark.parametrize(
        "full_path, duration",
        [
            (os.path.join(VIDEO_DIR, x), y)
            for x, y in [
                ("v_SoccerJuggling_g23_c01.avi", 8.0),
                ("v_SoccerJuggling_g24_c01.avi", 8.0),
                ("R6llTwEh07w.mp4", 10.0),
                ("SOX5yA1l24A.mp4", 11.0),
                ("WUzgd7C1pWA.mp4", 11.0),
            ]
        ],
    )
    def test_seek_reading(self, keyframes, full_path, duration):
        torchvision.set_video_backend("cuda")
        decoder = VideoReader(full_path)
        time = duration / 2
        decoder.seek(time, keyframes_only=keyframes)
        with av.open(full_path) as container:
            container.seek(int(time * 1000000), any_frame=not keyframes, backward=False)
            for av_frame in container.decode(container.streams.video[0]):
                av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray())
                vision_frames = next(decoder)["data"]
                mean_delta = torch.mean(torch.abs(av_frames.float() - vision_frames.cpu().float()))
                assert mean_delta < 0.75

    @pytest.mark.skipif(av is None, reason="PyAV unavailable")
    @pytest.mark.parametrize(
        "video_file",
        [
            "RATRACE_wave_f_nm_np1_fr_goo_37.avi",
            "TrumanShow_wave_f_nm_np1_fr_med_26.avi",
            "v_SoccerJuggling_g23_c01.avi",
            "v_SoccerJuggling_g24_c01.avi",
            "R6llTwEh07w.mp4",
            "SOX5yA1l24A.mp4",
            "WUzgd7C1pWA.mp4",
        ],
    )
    def test_metadata(self, video_file):
        torchvision.set_video_backend("cuda")
        full_path = os.path.join(VIDEO_DIR, video_file)
        decoder = VideoReader(full_path)
        video_metadata = decoder.get_metadata()["video"]
        with av.open(full_path) as container:
            video = container.streams.video[0]
            av_duration = float(video.duration * video.time_base)
            assert math.isclose(video_metadata["duration"], av_duration, rel_tol=1e-2)
            assert math.isclose(video_metadata["fps"], video.base_rate, rel_tol=1e-2)


if __name__ == "__main__":
    pytest.main([__file__])