Skip to content

Image Quality Metrics

LPIPSMetric

Bases: BaseImageQualityMetric

LPIPS Metric to compute the Learned Perceptual Image Patch Similarity (LPIPS) score between two images. LPIPS essentially computes the similarity between the activations of two image patches for some pre-defined network. This measure has been shown to match human perception well. A low LPIPS score means that image patches are perceptual similar.

Parameters:

Name Type Description Default
lpips_net_type str

The network type to use for computing LPIPS. One of "alex", "vgg", or "squeeze".

'alex'
image_size Tuple[int, int]

The size to which images will be resized before computing LPIPS.

(512, 512)
name str

The name of the metric.

'alexnet_learned_perceptual_image_patch_similarity'
Source code in hemm/metrics/image_quality/lpips.py
class LPIPSMetric(BaseImageQualityMetric):
    """LPIPS Metric to compute the Learned Perceptual Image Patch Similarity (LPIPS) score
    between two images. LPIPS essentially computes the similarity between the activations of
    two image patches for some pre-defined network. This measure has been shown to match
    human perception well. A low LPIPS score means that image patches are perceptual similar.

    Args:
        lpips_net_type (str): The network type to use for computing LPIPS. One of "alex", "vgg",
            or "squeeze".
        image_size (Tuple[int, int]): The size to which images will be resized before computing
            LPIPS.
        name (str): The name of the metric.
    """

    def __init__(
        self,
        lpips_net_type: Literal["alex", "vgg", "squeeze"] = "alex",
        image_size: Optional[Tuple[int, int]] = (512, 512),
        name: str = "alexnet_learned_perceptual_image_patch_similarity",
    ) -> None:
        super().__init__(name)
        self.image_size = image_size
        self.lpips_metric = partial(
            learned_perceptual_image_patch_similarity, net_type=lpips_net_type
        )
        self.config = {"lpips_net_type": lpips_net_type}

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image, generated_pil_image: Image, prompt: str
    ) -> ComputeMetricOutput:
        ground_truth_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(ground_truth_pil_image.resize(self.image_size)), axis=0
                ).astype(np.uint8)
            )
            .permute(0, 3, 2, 1)
            .float()
        )
        generated_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(generated_pil_image.resize(self.image_size)), axis=0
                ).astype(np.uint8)
            )
            .permute(0, 3, 2, 1)
            .float()
        )
        ground_truth_image = (ground_truth_image / 127.5) - 1.0
        generated_image = (generated_image / 127.5) - 1.0
        return ComputeMetricOutput(
            score=float(
                self.lpips_metric(generated_image, ground_truth_image).detach()
            ),
            ground_truth_image=base64_encode_image(ground_truth_pil_image),
        )

    @weave.op()
    async def __call__(
        self, prompt: str, ground_truth_image: str, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = "LPIPSMetric"
        return super().__call__(prompt, ground_truth_image, model_output)

PSNRMetric

Bases: BaseImageQualityMetric

PSNR Metric to compute the Peak Signal-to-Noise Ratio (PSNR) between two images.

Parameters:

Name Type Description Default
psnr_data_range Optional[Union[float, Tuple[float, float]]]

The data range of the input image (min, max). If None, the data range is determined from the image data type.

None
psnr_base float

The base of the logarithm in the PSNR formula.

10.0
image_size Tuple[int, int]

The size to which images will be resized before computing PSNR.

(512, 512)
name str

The name of the metric.

'peak_signal_noise_ratio'
Source code in hemm/metrics/image_quality/psnr.py
class PSNRMetric(BaseImageQualityMetric):
    """PSNR Metric to compute the Peak Signal-to-Noise Ratio (PSNR) between two images.

    Args:
        psnr_data_range (Optional[Union[float, Tuple[float, float]]]): The data range of the input
            image (min, max). If None, the data range is determined from the image data type.
        psnr_base (float): The base of the logarithm in the PSNR formula.
        image_size (Tuple[int, int]): The size to which images will be resized before computing
            PSNR.
        name (str): The name of the metric.
    """

    def __init__(
        self,
        psnr_data_range: Optional[Union[float, Tuple[float, float]]] = None,
        psnr_base: float = 10.0,
        image_size: Optional[Tuple[int, int]] = (512, 512),
        name: str = "peak_signal_noise_ratio",
    ) -> None:
        super().__init__(name)
        self.image_size = image_size
        self.psnr_metric = partial(
            peak_signal_noise_ratio, data_range=psnr_data_range, base=psnr_base
        )
        self.config = {
            "psnr_base": psnr_base,
            "psnr_data_range": psnr_data_range,
            "image_size": image_size,
        }

    @weave.op()
    def compute_metric(
        self,
        ground_truth_pil_image: Image.Image,
        generated_pil_image: Image.Image,
        prompt: str,
    ) -> ComputeMetricOutput:
        ground_truth_image = torch.from_numpy(
            np.expand_dims(
                np.array(ground_truth_pil_image.resize(self.image_size)), axis=0
            ).astype(np.uint8)
        ).float()
        generated_image = torch.from_numpy(
            np.expand_dims(
                np.array(generated_pil_image.resize(self.image_size)), axis=0
            ).astype(np.uint8)
        ).float()
        return ComputeMetricOutput(
            score=float(self.psnr_metric(generated_image, ground_truth_image).detach()),
            ground_truth_image=base64_encode_image(ground_truth_pil_image),
        )

    @weave.op()
    async def __call__(
        self, prompt: str, ground_truth_image: str, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = "PSNRMetric"
        return super().__call__(prompt, ground_truth_image, model_output)

SSIMMetric

Bases: BaseImageQualityMetric

SSIM Metric to compute the Structural Similarity Index Measure (SSIM) between two images.

Parameters:

Name Type Description Default
ssim_gaussian_kernel bool

Whether to use a Gaussian kernel for SSIM computation.

True
ssim_sigma float

The standard deviation of the Gaussian kernel.

1.5
ssim_kernel_size int

The size of the Gaussian kernel.

11
ssim_data_range Optional[Union[float, Tuple[float, float]]]

The data range of the input image (min, max). If None, the data range is determined from the image data type.

None
ssim_k1 float

The constant used to stabilize the SSIM numerator.

0.01
ssim_k2 float

The constant used to stabilize the SSIM denominator.

0.03
image_size Tuple[int, int]

The size to which images will be resized before computing SSIM.

(512, 512)
name str

The name of the metric.

'structural_similarity_index_measure'
Source code in hemm/metrics/image_quality/ssim.py
class SSIMMetric(BaseImageQualityMetric):
    """SSIM Metric to compute the
    [Structural Similarity Index Measure (SSIM)](https://en.wikipedia.org/wiki/Structural_similarity)
    between two images.

    Args:
        ssim_gaussian_kernel (bool): Whether to use a Gaussian kernel for SSIM computation.
        ssim_sigma (float): The standard deviation of the Gaussian kernel.
        ssim_kernel_size (int): The size of the Gaussian kernel.
        ssim_data_range (Optional[Union[float, Tuple[float, float]]]): The data range of the input
            image (min, max). If None, the data range is determined from the image data type.
        ssim_k1 (float): The constant used to stabilize the SSIM numerator.
        ssim_k2 (float): The constant used to stabilize the SSIM denominator.
        image_size (Tuple[int, int]): The size to which images will be resized before computing
            SSIM.
        name (str): The name of the metric.
    """

    def __init__(
        self,
        ssim_gaussian_kernel: bool = True,
        ssim_sigma: float = 1.5,
        ssim_kernel_size: int = 11,
        ssim_data_range: Union[float, Tuple[float, float], None] = None,
        ssim_k1: float = 0.01,
        ssim_k2: float = 0.03,
        image_size: Optional[Tuple[int, int]] = (512, 512),
        name: str = "structural_similarity_index_measure",
    ) -> None:
        super().__init__(name)
        self.image_size = image_size
        self.ssim_metric = partial(
            structural_similarity_index_measure,
            gaussian_kernel=ssim_gaussian_kernel,
            sigma=ssim_sigma,
            kernel_size=ssim_kernel_size,
            data_range=ssim_data_range,
            k1=ssim_k1,
            k2=ssim_k2,
        )
        self.config = {
            "ssim_gaussian_kernel": ssim_gaussian_kernel,
            "ssim_sigma": ssim_sigma,
            "ssim_kernel_size": ssim_kernel_size,
            "ssim_data_range": ssim_data_range,
            "ssim_k1": ssim_k1,
            "ssim_k2": ssim_k2,
        }

    @weave.op()
    def compute_metric(
        self,
        ground_truth_pil_image: Image.Image,
        generated_pil_image: Image.Image,
        prompt: str,
    ) -> ComputeMetricOutput:
        ground_truth_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(ground_truth_pil_image.resize(self.image_size)), axis=0
                ).astype(np.uint8)
            )
            .permute(0, 3, 1, 2)
            .float()
        )
        generated_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(generated_pil_image.resize(self.image_size)), axis=0
                ).astype(np.uint8)
            )
            .permute(0, 3, 1, 2)
            .float()
        )
        return ComputeMetricOutput(
            score=float(self.ssim_metric(generated_image, ground_truth_image)),
            ground_truth_image=base64_encode_image(ground_truth_pil_image),
        )

    @weave.op()
    async def __call__(
        self, prompt: str, ground_truth_image: str, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = "SSIMMetric"
        return super().__call__(prompt, ground_truth_image, model_output)

BaseImageQualityMetric

Bases: ABC

Source code in hemm/metrics/image_quality/base.py
class BaseImageQualityMetric(ABC):

    def __init__(self, name: str) -> None:
        """Base class for Image Quality Metrics.

        Args:
            name (str): Name of the metric.
        """
        super().__init__()
        self.scores = []
        self.name = name
        self.config = {}

    @abstractmethod
    def compute_metric(
        self,
        ground_truth_pil_image: Image.Image,
        generated_pil_image: Image.Image,
        prompt: str,
    ) -> ComputeMetricOutput:
        """Compute the metric for the given images. This is an abstract
        method and must be overriden by the child class implementation.

        Args:
            ground_truth_pil_image (Image.Image): Ground truth image in PIL format.
            generated_pil_image (Image.Image): Generated image in PIL format.
            prompt (str): Prompt for the image generation.

        Returns:
            ComputeMetricOutput: Output containing the metric score and ground truth image.
        """
        pass

    def __call__(
        self, prompt: str, ground_truth_image: str, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        """Compute the metric for the given images. This method is used as the scorer
        function for `weave.Evaluation` in the evaluation pipelines.

        Args:
            prompt (str): Prompt for the image generation.
            ground_truth_image (str): Ground truth image in base64 format.
            model_output (Dict[str, Any]): Model output containing the generated image.

        Returns:
            Union[float, Dict[str, float]]: Metric score.
        """
        ground_truth_pil_image = Image.open(
            BytesIO(base64.b64decode(ground_truth_image.split(";base64,")[-1]))
        )
        generated_pil_image = Image.open(
            BytesIO(base64.b64decode(model_output["image"].split(";base64,")[-1]))
        )
        metric_output = self.compute_metric(
            ground_truth_pil_image, generated_pil_image, prompt
        )
        self.scores.append(metric_output.score)
        return {self.name: metric_output.score}

__call__(prompt, ground_truth_image, model_output)

Compute the metric for the given images. This method is used as the scorer function for weave.Evaluation in the evaluation pipelines.

Parameters:

Name Type Description Default
prompt str

Prompt for the image generation.

required
ground_truth_image str

Ground truth image in base64 format.

required
model_output Dict[str, Any]

Model output containing the generated image.

required

Returns:

Type Description
Union[float, Dict[str, float]]

Union[float, Dict[str, float]]: Metric score.

Source code in hemm/metrics/image_quality/base.py
def __call__(
    self, prompt: str, ground_truth_image: str, model_output: Dict[str, Any]
) -> Union[float, Dict[str, float]]:
    """Compute the metric for the given images. This method is used as the scorer
    function for `weave.Evaluation` in the evaluation pipelines.

    Args:
        prompt (str): Prompt for the image generation.
        ground_truth_image (str): Ground truth image in base64 format.
        model_output (Dict[str, Any]): Model output containing the generated image.

    Returns:
        Union[float, Dict[str, float]]: Metric score.
    """
    ground_truth_pil_image = Image.open(
        BytesIO(base64.b64decode(ground_truth_image.split(";base64,")[-1]))
    )
    generated_pil_image = Image.open(
        BytesIO(base64.b64decode(model_output["image"].split(";base64,")[-1]))
    )
    metric_output = self.compute_metric(
        ground_truth_pil_image, generated_pil_image, prompt
    )
    self.scores.append(metric_output.score)
    return {self.name: metric_output.score}

__init__(name)

Base class for Image Quality Metrics.

Parameters:

Name Type Description Default
name str

Name of the metric.

required
Source code in hemm/metrics/image_quality/base.py
def __init__(self, name: str) -> None:
    """Base class for Image Quality Metrics.

    Args:
        name (str): Name of the metric.
    """
    super().__init__()
    self.scores = []
    self.name = name
    self.config = {}

compute_metric(ground_truth_pil_image, generated_pil_image, prompt) abstractmethod

Compute the metric for the given images. This is an abstract method and must be overriden by the child class implementation.

Parameters:

Name Type Description Default
ground_truth_pil_image Image

Ground truth image in PIL format.

required
generated_pil_image Image

Generated image in PIL format.

required
prompt str

Prompt for the image generation.

required

Returns:

Name Type Description
ComputeMetricOutput ComputeMetricOutput

Output containing the metric score and ground truth image.

Source code in hemm/metrics/image_quality/base.py
@abstractmethod
def compute_metric(
    self,
    ground_truth_pil_image: Image.Image,
    generated_pil_image: Image.Image,
    prompt: str,
) -> ComputeMetricOutput:
    """Compute the metric for the given images. This is an abstract
    method and must be overriden by the child class implementation.

    Args:
        ground_truth_pil_image (Image.Image): Ground truth image in PIL format.
        generated_pil_image (Image.Image): Generated image in PIL format.
        prompt (str): Prompt for the image generation.

    Returns:
        ComputeMetricOutput: Output containing the metric score and ground truth image.
    """
    pass

ComputeMetricOutput

Bases: BaseModel

Output of the metric computation function.

Source code in hemm/metrics/image_quality/base.py
class ComputeMetricOutput(BaseModel):
    """Output of the metric computation function."""

    score: Union[float, Dict[str, float]]
    ground_truth_image: str