Source code for ndx_hed.utils.hed_nwb_validator

"""
HedValidator class for validating HED tags in NWB DynamicTable objects.
"""

import io
import json
import math
from typing import List, Dict, Any, Optional
from pynwb import NWBFile
from pynwb.core import DynamicTable
from ndx_events import EventsTable
from hed.errors import ErrorHandler, ErrorContext, HedExceptions, HedFileError
from hed.errors.error_reporter import check_for_any_errors
from hed.models import HedString, TabularInput
from ..hed_lab_metadata import HedLabMetaData
from ..hed_tags import HedTags, HedValueVector
from .bids2nwb import get_bids_events


[docs] class HedNWBValidator: """ A validator class for HED tags in NWB DynamicTable objects. This class provides methods to validate HED tags in various NWB data structures using HED schema information stored in HedLabMetaData. """
[docs] def __init__(self, hed_metadata: HedLabMetaData): """ Initialize the HedNWBValidator with HED metadata. Parameters: hed_metadata (HedLabMetaData): The HED lab metadata containing schema information. Must be a valid HedLabMetaData instance with a loaded HED schema. If the HedLabMetaData was constructed successfully, it is guaranteed to have a valid schema. Raises: ValueError: If hed_metadata is not an instance of HedLabMetaData Notes: HedLabMetaData validates the schema during its own construction, so if a HedLabMetaData instance exists, it is guaranteed to have a valid HED schema and version. No additional validation is needed here. """ if not isinstance(hed_metadata, HedLabMetaData): raise ValueError("hed_metadata must be an instance of HedLabMetaData") self.hed_schema = hed_metadata.get_hed_schema() self.def_dict = hed_metadata.get_definition_dict()
[docs] def validate_table(self, table: DynamicTable, error_handler: Optional[ErrorHandler] = None) -> List[Dict[str, Any]]: """ Validates all HedTags columns in a DynamicTable using the provided HED schema metadata. Parameters: table (DynamicTable): The dynamic table to validate error_handler (ErrorHandler, optional): An ErrorHandler instance for collecting errors. If None, a new instance will be created. Returns: List[Dict[str, Any]]: A consolidated list of validation issues from all HedTags columns """ if table is None or not isinstance(table, DynamicTable): raise ValueError("The provided table is not a valid DynamicTable instance.") if error_handler is None: error_handler = ErrorHandler(check_for_warnings=False) issues = [] # TODO: FILE_NAME context needs to be replaced by TABLE context when available in hed-python error_handler.push_error_context(ErrorContext.FILE_NAME, table.name) for col in table.columns: if isinstance(col, HedTags): error_handler.push_error_context(ErrorContext.COLUMN, col.name) col_issues = self.validate_vector(col, error_handler) issues += col_issues error_handler.pop_error_context() elif isinstance(col, HedValueVector): error_handler.push_error_context(ErrorContext.COLUMN, col.name) col_issues = self.validate_value_vector(col, error_handler) issues += col_issues error_handler.pop_error_context() error_handler.pop_error_context() return issues
[docs] def validate_vector(self, hed_tags: HedTags, error_handler: Optional[ErrorHandler] = None) -> List[Dict[str, Any]]: """ Validates a HedTags column using the provided HED schema metadata. Parameters: hed_tags (HedTags): The HedTags column to validate error_handler (ErrorHandler, optional): An ErrorHandler instance for collecting errors. If None, a new instance will be created. Returns: List[Dict[str, Any]]: A list of validation issues found in the HedTags column """ if hed_tags is None or not isinstance(hed_tags, HedTags): raise ValueError("The provided hed_tags is not a valid HedTags instance.") if error_handler is None: error_handler = ErrorHandler(check_for_warnings=False) issues = [] for index, tag in enumerate(hed_tags.data): if tag is None or tag == "" or tag == "n/a": continue error_handler.push_error_context(ErrorContext.ROW, index) hed_obj = HedString(tag, self.hed_schema, def_dict=self.def_dict) row_issues = hed_obj.validate(allow_placeholders=False, error_handler=error_handler) issues += row_issues error_handler.pop_error_context() return issues
[docs] def validate_value_vector( self, hed_values: HedValueVector, error_handler: Optional[ErrorHandler] = None ) -> List[Dict[str, Any]]: """ Validates a HedValueVector column using the provided HED schema metadata. Parameters: hed_values (HedValueVector): The HedValueVector column to validate error_handler (ErrorHandler, optional): An ErrorHandler instance for collecting errors. If None, a new instance will be created. Returns: List[Dict[str, Any]]: A list of validation issues found in the HedValueVector column """ if hed_values is None or not isinstance(hed_values, HedValueVector) or hed_values.hed is None: raise ValueError("The provided hed_values is not a valid HedValueVector instance.") if error_handler is None: error_handler = ErrorHandler(check_for_warnings=False) issues = [] # Validate the HED template first hed_template = HedString(hed_values.hed, self.hed_schema, def_dict=self.def_dict) issues += hed_template.validate(allow_placeholders=True, error_handler=error_handler) if check_for_any_errors(issues): return issues for index, tag in enumerate(hed_values.data): if tag is None or tag == "" or tag == "n/a" or (isinstance(tag, float) and math.isnan(tag)): continue error_handler.push_error_context(ErrorContext.ROW, index) # Substitute the tag value into the template in place of # eval_tag = hed_values.hed.replace("#", str(tag)) hed_obj = HedString(eval_tag, self.hed_schema, def_dict=self.def_dict) row_issues = hed_obj.validate(allow_placeholders=False, error_handler=error_handler) issues += row_issues error_handler.pop_error_context() return issues
[docs] def validate_events( self, events: EventsTable, error_handler: Optional[ErrorHandler] = None ) -> List[Dict[str, Any]]: """ Validates HED tags in an EventsTable by converting it to BIDS format and validating the events. This function extracts the BIDS-formatted DataFrame and JSON sidecar from the EventsTable using get_bids_events(), then validates the HED tags contained within using the provided HED schema metadata. Parameters: events (EventsTable): The EventsTable to validate containing HED tags error_handler (ErrorHandler, optional): An ErrorHandler instance for collecting errors. If None, a new instance will be created. Returns: List[Dict[str, Any]]: A list of validation issues found in the EventsTable HED tags Raises: ValueError: If the EventsTable is invalid or cannot be converted to BIDS format Notes: This function uses get_bids_events() to extract BIDS-formatted data from the EventsTable, then applies HED validation to the extracted event annotations. The validation follows BIDS-HED standards for event annotation validation. """ if events is None or not isinstance(events, EventsTable): raise ValueError("The provided events is not a valid EventsTable instance.") if error_handler is None: error_handler = ErrorHandler(check_for_warnings=False) # Convert EventsTable to BIDS format using get_bids_events df, json_data = get_bids_events(events) if json_data: json_input = json.dumps(json_data) else: json_input = None if json_input is not None: sidecar = io.StringIO(json_input) else: sidecar = None tab_input = TabularInput(file=df, sidecar=sidecar, name=events.name) issues = tab_input.validate(self.hed_schema, extra_def_dicts=self.def_dict, error_handler=error_handler) return issues
[docs] def validate_file(self, nwbfile: NWBFile, error_handler: Optional[ErrorHandler] = None) -> List[Dict[str, Any]]: """ Validates all HED tags in an NWB file by iterating through all DynamicTable objects. This method first checks if HedLabMetaData is defined in the NWB file. If not found, it returns an issue about invalid schema. Then it iterates through all DynamicTable objects in the file, calling validate_events for EventsTable objects and validate_table for other DynamicTable objects. Parameters: nwbfile (NWBFile): The NWB file to validate error_handler (ErrorHandler, optional): An ErrorHandler instance for collecting errors. If None, a new instance will be created. Returns: List[Dict[str, Any]]: A consolidated list of validation issues from all tables in the file Raises: ValueError: If nwbfile is not a valid NWBFile instance HedFileError: If HedLabMetaData is missing or invalid in the NWB file HedFileError: If the HED schema version in the NWB file does not match the validator's schema version """ if nwbfile is None or not isinstance(nwbfile, NWBFile): raise ValueError("The provided nwbfile is not a valid NWBFile instance.") # Check if HedLabMetaData is defined in the file and matches the validator's schema version hed_metadata = nwbfile.lab_meta_data.get("hed_schema") if hed_metadata is None or not isinstance(hed_metadata, HedLabMetaData): raise HedFileError( HedExceptions.SCHEMA_INVALID, f"NWB file {nwbfile.identifier} does not have a valid HED schema", "" ) if hed_metadata.get_hed_schema_version() != self.hed_schema.version: raise HedFileError( HedExceptions.SCHEMA_VERSION_INVALID, f"HED schema version in NWB file ({hed_metadata.get_hed_schema_version()})" + " does not match validator schema version" + f"({self.hed_schema.version})", "", ) if error_handler is None: error_handler = ErrorHandler(check_for_warnings=False) issues = [] error_handler.push_error_context(ErrorContext.FILE_NAME, nwbfile.identifier) # Validate DynamicTable objects in the NWB file for obj in nwbfile.all_children(): if not isinstance(obj, DynamicTable): continue if not isinstance(obj, EventsTable): table_issues = self.validate_table(obj, error_handler) else: table_issues = self.validate_events(obj, error_handler) issues.extend(table_issues) error_handler.pop_error_context() return issues