Source code for hedvis.generators.word_cloud

"""Utilities for creating a word cloud."""

import numpy as np
from PIL import Image
from hed.errors.exceptions import HedFileError
from hedvis.generators import word_cloud_util
from wordcloud import WordCloud

MIN_WORD_CLOUD_SIZE = 100



[docs]
def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=300, **kwargs):
    """Takes a word dict and returns a generated word cloud object.

    Parameters:
        word_dict (dict): words and their frequencies
        mask_path (str or None): The path of the mask file
        background_color (str or None): If None, transparent background.
        width (int): width in pixels.
        height (int): height in pixels.
        kwargs (kwargs): Any other parameters WordCloud accepts, overrides default values where relevant.

    Returns:
        WordCloud: The generated cloud. (Use .to_file to save it out as an image.)

    :raises ValueError:
        An empty dictionary was passed
    """
    mask_image = None
    if mask_path:
        mask_image = load_and_resize_mask(mask_path, width, height)
        width = round(mask_image.shape[1])
        height = round(mask_image.shape[0])
    if height is None and width is None:
        width = 400
        height = 300
    elif height is None:
        height = round(width / 1.5)
    elif width is None:
        width = round(height * 1.5)
    width = max(width, MIN_WORD_CLOUD_SIZE)
    height = max(height, MIN_WORD_CLOUD_SIZE)
    kwargs.setdefault("contour_width", 3)
    kwargs.setdefault("contour_color", "black")
    kwargs.setdefault("prefer_horizontal", 0.75)
    kwargs.setdefault("color_func", word_cloud_util.default_color_func)
    kwargs.setdefault("relative_scaling", 1)
    kwargs.setdefault("max_font_size", max(round(height / 20), 12))
    kwargs.setdefault("min_font_size", 8)
    if "font_path" not in kwargs:
        kwargs["font_path"] = None
    elif kwargs["font_path"] and not kwargs["font_path"].lower().endswith((".ttf", ".otf", ".ttc")):
        raise HedFileError("InvalidFontPath", f"Font {kwargs['font_path']} not valid on this system", "")

    wc = WordCloud(background_color=background_color, mask=mask_image, width=width, height=height, mode="RGBA", **kwargs)

    wc.generate_from_frequencies(word_dict)

    return wc




[docs]
def word_cloud_to_svg(wc):
    """Return a WordCould as an SVG string.

    Parameters:
        wc (WordCloud): the word cloud object.

    Returns:
       str: The svg for the word cloud.

    """
    svg_string = wc.to_svg()
    svg_string = svg_string.replace("fill:", "fill:rgb")
    svg_string = svg_string.replace("</svg>", word_cloud_util.generate_contour_svg(wc, wc.width, wc.height) + "</svg>")
    return svg_string




[docs]
def load_and_resize_mask(mask_path, width=None, height=None):
    """Load a mask image and resize it according to given dimensions.

        The image is resized maintaining aspect ratio if only width or height is provided.

        Returns None if no mask_path.

    Parameters:
        mask_path (str): The path to the mask image file.
        width (int, optional): The desired width of the resized image. If only width is provided,
            the image is scaled to maintain its original aspect ratio. Defaults to None.
        height (int, optional): The desired height of the resized image. If only height is provided,
            the image is scaled to maintain its original aspect ratio. Defaults to None.

    Returns:
        numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255).
    """
    if mask_path:
        mask_image = Image.open(mask_path).convert("RGBA")

        if width or height:
            original_size = np.array((mask_image.width, mask_image.height))
            output_size = np.array((width, height))
            # Handle one missing param
            if not height:
                scale = original_size[0] / width
                output_size = original_size / scale
            elif not width:
                scale = original_size[1] / height
                output_size = original_size / scale

            mask_image = mask_image.resize(tuple(output_size.astype(int)), Image.LANCZOS)

        mask_image_array = np.array(mask_image)
        # Treat transparency (alpha < 128) or white (R>127, G>127, B>127) as white, else black
        mask_image_array = np.where(
            (mask_image_array[:, :, 3] < 128)
            | ((mask_image_array[:, :, 0] > 127) & (mask_image_array[:, :, 1] > 127) & (mask_image_array[:, :, 2] > 127)),
            255,
            0,
        )

        return mask_image_array.astype(np.uint8)