Source code for hed.schema.schema_validation.hed_id_validator

"""Validator for HED ID consistency across schema versions."""

from hed.schema.hed_cache import get_library_data
from semantic_version import Version
from hed.schema.hed_schema_io import load_schema_version
from hed.schema.hed_cache import get_hed_versions
from hed.schema.hed_schema_constants import HedKey
from hed.errors.error_types import SchemaAttributeErrors
from hed.errors.error_reporter import ErrorHandler


[docs] class HedIDValidator: """Support class to validate hedIds in schemas""" def __init__(self, hed_schema): """Support class to validate hedIds in schemas Parameters: hed_schema(HedSchemaBase): The schema we're validating. It uses this to derive the version number(s) of the previous schema. """ self.hed_schema = hed_schema self._previous_schemas = {} versions = self.hed_schema.version_number.split(",") libraries = self.hed_schema.library.split(",") prev_versions = {} self.library_data = {} for version, library in zip(versions, libraries, strict=False): prev_version = self._get_previous_version(version, library) if prev_version: prev_versions[library] = prev_version library_data = get_library_data(library) if library_data: self.library_data[library] = library_data # Add the standard schema if we have a with_standard if "" not in prev_versions and self.hed_schema.with_standard: prev_version = self._get_previous_version(self.hed_schema.with_standard, "") if prev_version: prev_versions[""] = prev_version library_data = get_library_data("") if library_data: self.library_data[""] = get_library_data("") if prev_versions: self._previous_schemas = { library: load_schema_version(full_version) for library, full_version in prev_versions.items() } @staticmethod def _get_previous_version(version, library): current_version = Version(version) all_schema_versions = get_hed_versions(library_name=library, check_prerelease=False) for old_version in all_schema_versions: if Version(old_version) < current_version: prev_version = old_version if library: prev_version = f"{library}_{prev_version}" return prev_version def _get_old_id_int(self, tag_entry, tag_library): """Returns the integer hedId from the previous schema version for this entry, or None. Parameters: tag_entry (HedSchemaEntry): The schema entry being validated. tag_library (str): The library prefix for this entry (empty string for standard schema). Returns: int or None: The previous hedId as an integer, or None if absent or unparsable. """ previous_schema = self._previous_schemas.get(tag_library) if not previous_schema: return None old_entry = previous_schema.get_tag_entry(tag_entry.name, key_class=tag_entry.section_key) if not old_entry: return None old_id_str = old_entry.attributes.get(HedKey.HedID) if not old_id_str: return None try: return int(old_id_str.removeprefix("HED_")) except ValueError: return None # Silently ignore invalid old_id values (shouldn't happen in practice)
[docs] def verify_tag_id(self, hed_schema, tag_entry, attribute_name): """Validates the hedID attribute values This follows the template from schema_attribute_validators.py Parameters: hed_schema (HedSchema): The schema to use for validation tag_entry (HedSchemaEntry): The schema entry for this tag. attribute_name (str): The name of this attribute. Returns: issues(list): A list of issues from validating this attribute. """ # todo: If you have a way to know the schema should have 100% ids, you could check for that and flag missing new_id_str = tag_entry.attributes.get(attribute_name, "") tag_library = tag_entry.has_attribute(HedKey.InLibrary, return_value=True) or "" old_id = self._get_old_id_int(tag_entry, tag_library) new_id = None if new_id_str: try: new_id = int(new_id_str.removeprefix("HED_")) except ValueError: return ErrorHandler.format_error( SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id_str ) # Nothing to verify if not new_id_str and old_id is None: return [] issues = [] if old_id and old_id != new_id: issues += ErrorHandler.format_error( SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id, old_id=old_id ) library_data = self.library_data.get(tag_library) if library_data and new_id is not None: starting_id, ending_id = library_data["id_range"] if new_id < starting_id or new_id > ending_id: issues += ErrorHandler.format_error( SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id, valid_min=starting_id, valid_max=ending_id, ) return issues