Skip to content

class FVD

Source code in aigve/metrics/video_quality_assessment/distribution_based/
class FVDScore:
    def __init__(self, model_path, feature_layer=-2):
        Initialize the FVDScore evaluator.

        model_path (str): Path to the pre-trained I3D model.
        feature_layer (int or str): The layer of the I3D model to use for feature extraction.
        self.i3d_model = self.load_i3d_model(model_path, feature_layer)

    def load_i3d_model(model_path, feature_layer):
        Load a pre-trained I3D model for feature extraction.

        model_path (str): Path to the pre-trained I3D model.
        feature_layer (int or str): The layer of the I3D model to use for feature extraction.

        Model: The I3D model for feature extraction.
        i3d_model = tf.keras.models.load_model(model_path)
        feature_model = Model(inputs=i3d_model.input, outputs=i3d_model.layers[feature_layer].output)
        return feature_model

    def preprocess_videos(self, videos):
        Preprocess videos for the I3D model.

        videos (numpy array): Input videos as a numpy array of shape (num_videos, num_frames, height, width, channels).

        numpy array: Preprocessed videos.
        return preprocess_input(videos)

    def calculate_statistics(self, videos):
        Calculate the feature statistics (mean and covariance) for a set of videos.

        videos (numpy array): Preprocessed videos.

        tuple: Mean and covariance of the features.
        features = self.i3d_model.predict(videos)
        mu = features.mean(axis=0)
        sigma = np.cov(features, rowvar=False)
        return mu, sigma

    def calculate_fvd(self, videos1, videos2):
        Calculate the FVD score between two sets of videos.

        videos1 (numpy array): First set of videos of shape (num_videos, num_frames, height, width, channels).
        videos2 (numpy array): Second set of videos of shape (num_videos, num_frames, height, width, channels).

        float: The FVD score.
        # Preprocess videos
        videos1 = self.preprocess_videos(videos1)
        videos2 = self.preprocess_videos(videos2)

        # Calculate statistics
        mu1, sigma1 = self.calculate_statistics(videos1)
        mu2, sigma2 = self.calculate_statistics(videos2)

        # Compute FVD score
        ssdiff = np.sum((mu1 - mu2) ** 2.0)
        covmean = sqrtm(

        # Check and correct for imaginary numbers
        if np.iscomplexobj(covmean):
            covmean = covmean.real

        fvd = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
        return fvd

__init__(model_path, feature_layer=-2)

Initialize the FVDScore evaluator.

Parameters: model_path (str): Path to the pre-trained I3D model. feature_layer (int or str): The layer of the I3D model to use for feature extraction.

Source code in aigve/metrics/video_quality_assessment/distribution_based/
def __init__(self, model_path, feature_layer=-2):
    Initialize the FVDScore evaluator.

    model_path (str): Path to the pre-trained I3D model.
    feature_layer (int or str): The layer of the I3D model to use for feature extraction.
    self.i3d_model = self.load_i3d_model(model_path, feature_layer)

calculate_fvd(videos1, videos2)

Calculate the FVD score between two sets of videos.

Parameters: videos1 (numpy array): First set of videos of shape (num_videos, num_frames, height, width, channels). videos2 (numpy array): Second set of videos of shape (num_videos, num_frames, height, width, channels).

Returns: float: The FVD score.

Source code in aigve/metrics/video_quality_assessment/distribution_based/
def calculate_fvd(self, videos1, videos2):
    Calculate the FVD score between two sets of videos.

    videos1 (numpy array): First set of videos of shape (num_videos, num_frames, height, width, channels).
    videos2 (numpy array): Second set of videos of shape (num_videos, num_frames, height, width, channels).

    float: The FVD score.
    # Preprocess videos
    videos1 = self.preprocess_videos(videos1)
    videos2 = self.preprocess_videos(videos2)

    # Calculate statistics
    mu1, sigma1 = self.calculate_statistics(videos1)
    mu2, sigma2 = self.calculate_statistics(videos2)

    # Compute FVD score
    ssdiff = np.sum((mu1 - mu2) ** 2.0)
    covmean = sqrtm(

    # Check and correct for imaginary numbers
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fvd = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fvd


Calculate the feature statistics (mean and covariance) for a set of videos.

Parameters: videos (numpy array): Preprocessed videos.

Returns: tuple: Mean and covariance of the features.

Source code in aigve/metrics/video_quality_assessment/distribution_based/
def calculate_statistics(self, videos):
    Calculate the feature statistics (mean and covariance) for a set of videos.

    videos (numpy array): Preprocessed videos.

    tuple: Mean and covariance of the features.
    features = self.i3d_model.predict(videos)
    mu = features.mean(axis=0)
    sigma = np.cov(features, rowvar=False)
    return mu, sigma

load_i3d_model(model_path, feature_layer) staticmethod

Load a pre-trained I3D model for feature extraction.

Parameters: model_path (str): Path to the pre-trained I3D model. feature_layer (int or str): The layer of the I3D model to use for feature extraction.

Returns: Model: The I3D model for feature extraction.

Source code in aigve/metrics/video_quality_assessment/distribution_based/
def load_i3d_model(model_path, feature_layer):
    Load a pre-trained I3D model for feature extraction.

    model_path (str): Path to the pre-trained I3D model.
    feature_layer (int or str): The layer of the I3D model to use for feature extraction.

    Model: The I3D model for feature extraction.
    i3d_model = tf.keras.models.load_model(model_path)
    feature_model = Model(inputs=i3d_model.input, outputs=i3d_model.layers[feature_layer].output)
    return feature_model


Preprocess videos for the I3D model.

Parameters: videos (numpy array): Input videos as a numpy array of shape (num_videos, num_frames, height, width, channels).

Returns: numpy array: Preprocessed videos.

Source code in aigve/metrics/video_quality_assessment/distribution_based/
def preprocess_videos(self, videos):
    Preprocess videos for the I3D model.

    videos (numpy array): Input videos as a numpy array of shape (num_videos, num_frames, height, width, channels).

    numpy array: Preprocessed videos.
    return preprocess_input(videos)