Source code for hed.validator.util.dup_util

"""Utility checker that detects duplicate tags and groups within a HED annotation."""

from typing import Union

from hed.errors.error_reporter import ErrorHandler
from hed.models.hed_tag import HedTag
from hed.errors.error_types import ValidationErrors


[docs] class DuplicateChecker: """Detects duplicate tags and groups within a HED annotation.""" def __init__(self): """Checker for duplications in HED groups. Notes: This checker has an early out strategy -- it returns when it finds an error. """ self.issues = []
[docs] def check_for_duplicates(self, group) -> list[dict]: """Find duplicates in a HED group and return the errors found. Parameters: group (HedGroup): The HED group to be checked. Returns: list: List of validation issues -- which might be empty if no duplicates detected. """ self.issues = [] self._get_recursive_hash(group) return self.issues
[docs] def get_hash(self, group) -> Union[int, None]: """Return the unique hash for the group as long as no duplicates. Parameters: group (HedGroup): The HED group to be checked. Returns: Union[int, None]: Unique hash or None if duplicates were detected within the group. Note: As a side effect, this method will clear the issues list if no duplicates are found. """ self.issues = [] duplication_hash = self._get_recursive_hash(group) return duplication_hash
def _get_recursive_hash(self, group) -> Union[int, None]: """Get recursive hash for a group. Parameters: group: The HED group to process. Returns: int | None: Hash value or None if issues detected. """ if len(self.issues) > 0: return None group_hashes = set() for child in group.children: if isinstance(child, HedTag): this_hash = hash(child) else: this_hash = self._get_recursive_hash(child) if len(self.issues) > 0 or this_hash is None: return None if this_hash in group_hashes: self.issues += self._get_duplication_error(child) return None group_hashes.add(this_hash) return hash(frozenset(group_hashes)) @staticmethod def _get_duplication_error(child) -> list[dict]: if isinstance(child, HedTag): return ErrorHandler.format_error(ValidationErrors.HED_TAG_REPEATED, child) else: found_group = child base_steps_up = 0 while isinstance(found_group, list): found_group = found_group[0] base_steps_up += 1 for _ in range(base_steps_up): found_group = found_group._parent return ErrorHandler.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, found_group)