Source code for hedvis.generators.word_cloud

"""Utilities for creating a word cloud."""

import numpy as np
from PIL import Image
from hed.errors.exceptions import HedFileError
from hedvis.generators import word_cloud_util
from wordcloud import WordCloud

MIN_WORD_CLOUD_SIZE = 100


[docs] def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=300, **kwargs): """Takes a word dict and returns a generated word cloud object. Parameters: word_dict (dict): words and their frequencies mask_path (str or None): The path of the mask file background_color (str or None): If None, transparent background. width (int): width in pixels. height (int): height in pixels. kwargs (kwargs): Any other parameters WordCloud accepts, overrides default values where relevant. Returns: WordCloud: The generated cloud. (Use .to_file to save it out as an image.) :raises ValueError: An empty dictionary was passed """ mask_image = None if mask_path: mask_image = load_and_resize_mask(mask_path, width, height) width = round(mask_image.shape[1]) height = round(mask_image.shape[0]) if height is None and width is None: width = 400 height = 300 elif height is None: height = round(width / 1.5) elif width is None: width = round(height * 1.5) width = max(width, MIN_WORD_CLOUD_SIZE) height = max(height, MIN_WORD_CLOUD_SIZE) kwargs.setdefault("contour_width", 3) kwargs.setdefault("contour_color", "black") kwargs.setdefault("prefer_horizontal", 0.75) kwargs.setdefault("color_func", word_cloud_util.default_color_func) kwargs.setdefault("relative_scaling", 1) kwargs.setdefault("max_font_size", max(round(height / 20), 12)) kwargs.setdefault("min_font_size", 8) if "font_path" not in kwargs: kwargs["font_path"] = None elif kwargs["font_path"] and not kwargs["font_path"].lower().endswith((".ttf", ".otf", ".ttc")): raise HedFileError("InvalidFontPath", f"Font {kwargs['font_path']} not valid on this system", "") wc = WordCloud(background_color=background_color, mask=mask_image, width=width, height=height, mode="RGBA", **kwargs) wc.generate_from_frequencies(word_dict) return wc
[docs] def word_cloud_to_svg(wc): """Return a WordCould as an SVG string. Parameters: wc (WordCloud): the word cloud object. Returns: str: The svg for the word cloud. """ svg_string = wc.to_svg() svg_string = svg_string.replace("fill:", "fill:rgb") svg_string = svg_string.replace("</svg>", word_cloud_util.generate_contour_svg(wc, wc.width, wc.height) + "</svg>") return svg_string
[docs] def load_and_resize_mask(mask_path, width=None, height=None): """Load a mask image and resize it according to given dimensions. The image is resized maintaining aspect ratio if only width or height is provided. Returns None if no mask_path. Parameters: mask_path (str): The path to the mask image file. width (int, optional): The desired width of the resized image. If only width is provided, the image is scaled to maintain its original aspect ratio. Defaults to None. height (int, optional): The desired height of the resized image. If only height is provided, the image is scaled to maintain its original aspect ratio. Defaults to None. Returns: numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255). """ if mask_path: mask_image = Image.open(mask_path).convert("RGBA") if width or height: original_size = np.array((mask_image.width, mask_image.height)) output_size = np.array((width, height)) # Handle one missing param if not height: scale = original_size[0] / width output_size = original_size / scale elif not width: scale = original_size[1] / height output_size = original_size / scale mask_image = mask_image.resize(tuple(output_size.astype(int)), Image.LANCZOS) mask_image_array = np.array(mask_image) # Treat transparency (alpha < 128) or white (R>127, G>127, B>127) as white, else black mask_image_array = np.where( (mask_image_array[:, :, 3] < 128) | ((mask_image_array[:, :, 0] > 127) & (mask_image_array[:, :, 1] > 127) & (mask_image_array[:, :, 2] > 127)), 255, 0, ) return mask_image_array.astype(np.uint8)