Skip to content

Validator

Validation tools and error handling for HED strings and schemas.

HedValidator

HedValidator

Top level validation of HED strings.

This module contains the HedValidator class which is used to validate the tags in a HED string or a file. The file types include .tsv, .txt, and .xlsx. To get the validation issues after creating a HedValidator class call the get_validation_issues() function.

Source code in hed/validator/hed_validator.py
class HedValidator:
    """ Top level validation of HED strings.

    This module contains the HedValidator class which is used to validate the tags in a HED string or a file.
    The file types include .tsv, .txt, and .xlsx. To get the validation issues after creating a
    HedValidator class call the get_validation_issues() function.
    """

    def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False):
        """ Constructor for the HedValidator class.

        Parameters:
            hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation.
            def_dicts (DefinitionDict or list or dict): the def dicts to use for validation
            definitions_allowed (bool): If False, flag definitions found as errors
        """
        if hed_schema is None:
            raise ValueError("HedSchema required for validation")

        self._hed_schema = hed_schema

        self._def_validator = DefValidator(def_dicts, hed_schema)
        self._definitions_allowed = definitions_allowed

        self._validate_characters = hed_schema.schema_83_props

        self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters)
        self._char_validator = CharRexValidator(modern_allowed_char_rules=self._validate_characters)
        self._string_validator = StringValidator()
        self._tag_validator = TagValidator()
        self._group_validator = GroupValidator(hed_schema)

    def validate(self, hed_string, allow_placeholders, error_handler=None) -> list[dict]:
        """ Validate the HED string object using the schema.

        Parameters:
            hed_string (HedString): the string to validate.
            allow_placeholders (bool): allow placeholders in the string.
            error_handler (ErrorHandler or None): the error handler to use, creates a default one if none passed.

        Returns:
            list[dict]: A list of issues for HED string.
        """
        if not error_handler:
            error_handler = error_reporter.ErrorHandler()
        issues = []
        issues += self.run_basic_checks(hed_string, allow_placeholders=allow_placeholders)
        error_handler.add_context_and_filter(issues)
        if error_reporter.check_for_any_errors(issues):
            return issues
        issues += self.run_full_string_checks(hed_string)
        error_handler.add_context_and_filter(issues)
        return issues

    def run_basic_checks(self, hed_string, allow_placeholders) -> list[dict]:
        """Run basic validation checks on a HED string.

        Parameters:
            hed_string (HedString): The HED string to validate.
            allow_placeholders (bool): Whether placeholders are allowed in the HED string.

        Returns:
            list[dict]: A list of issues found during validation. Each issue is represented as a dictionary.

        Notes:
            - This method performs initial validation checks on the HED string, including character validation and tag validation.
            - It checks for invalid characters, calculates canonical forms, and validates individual tags.
            - If any issues are found during these checks, the method stops and returns the issues immediately.
            - The method also validates definition tags if applicable.

        """
        issues = []
        issues += self._run_hed_string_validators(hed_string, allow_placeholders)
        if error_reporter.check_for_any_errors(issues):
            return issues
        if hed_string == "n/a":
            return issues
        for tag in hed_string.get_all_tags():
            issues += self._run_validate_tag_characters(tag, allow_placeholders=allow_placeholders)
        issues += hed_string._calculate_to_canonical_forms(self._hed_schema)
        if error_reporter.check_for_any_errors(issues):
            return issues
        issues += self._validate_individual_tags_in_hed_string(hed_string, allow_placeholders=allow_placeholders)
        issues += self._def_validator.validate_def_tags(hed_string, self)
        return issues

    def run_full_string_checks(self, hed_string) -> list[dict]:
        """Run all full-string validation checks on a HED string.

        Parameters:
            hed_string (HedString): The HED string to validate.

        Returns:
            list[dict]: A list of issues found during validation. Each issue is represented as a dictionary.

        Notes:
            - This method iterates through a series of validation checks defined in the `checks` list.
            - Each check is a callable function that takes `hed_string` as input and returns a list of issues.
            - If any check returns issues, the method stops and returns those issues immediately.
            - If no issues are found, an empty list is returned.

        """
        checks = [
            self._group_validator.run_all_tags_validators,
            self._group_validator.run_tag_level_validators,
            self._def_validator.validate_onset_offset,
        ]

        for check in checks:
            issues = check(hed_string)  # Call each function with `hed_string`
            if issues:
                return issues

        return []  # Return an empty list if no issues are found

    # Todo: mark semi private/actually private below this
    def _run_validate_tag_characters(self, original_tag, allow_placeholders) -> list[dict]:
        """ Basic character validation of tags

        Parameters:
            original_tag (HedTag): A original tag.
            allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value.

        Returns:
            list[dict]: The validation issues associated with the characters. Each issue is dictionary.

        """
        return self._char_validator.check_tag_invalid_chars(original_tag, allow_placeholders)

    def _run_hed_string_validators(self, hed_string_obj, allow_placeholders=False) -> list[dict]:
        """Basic high level checks of the HED string for illegal characters

           Catches fully banned characters, out of order parentheses, commas, repeated slashes, etc.

        Parameters:
            hed_string_obj (HedString): A HED string.
            allow_placeholders (bool): Allow placeholder and curly brace characters

        Returns:
            list[dict]: The validation issues associated with a HED string. Each issue is a dictionary.
         """
        validation_issues = []
        validation_issues += self._char_validator.check_invalid_character_issues(
            hed_string_obj.get_original_hed_string(), allow_placeholders)
        validation_issues += self._string_validator.run_string_validator(hed_string_obj)
        for original_tag in hed_string_obj.get_all_tags():
            validation_issues += self.check_tag_formatting(original_tag)
        return validation_issues

    pattern_doubleslash = re.compile(r"([ \t/]{2,}|^/|/$)")

    def check_tag_formatting(self, original_tag) -> list[dict]:
        """ Report repeated or erroneous slashes.

        Parameters:
            original_tag (HedTag): The original tag that is used to report the error.

        Returns:
            list[dict]: Validation issues. Each issue is a dictionary.
        """
        validation_issues = []
        for match in self.pattern_doubleslash.finditer(original_tag.org_tag):
            validation_issues += error_reporter.ErrorHandler.format_error(ValidationErrors.NODE_NAME_EMPTY,
                                                                          tag=original_tag,
                                                                          index_in_tag=match.start(),
                                                                          index_in_tag_end=match.end())

        return validation_issues

    def validate_units(self, original_tag, validate_text=None, report_as=None, error_code=None,
                       index_offset=0) -> list[dict]:
        """Validate units and value classes

        Parameters:
            original_tag (HedTag): The source tag
            validate_text (str): the text we want to validate, if not the full extension.
            report_as (HedTag): Report the error tag as coming from a different one.
                               Mostly for definitions that expand.
            error_code (str): The code to override the error as.  Again mostly for def/def-expand tags.
            index_offset (int): Offset into the extension validate_text starts at

        Returns:
            list[dict]: Issues found from units
        """
        if validate_text is None:
            validate_text = original_tag.extension
        issues = []
        if validate_text == '#':
            return []
        if original_tag.is_unit_class_tag():
            issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag,
                                                                                validate_text,
                                                                                report_as=report_as,
                                                                                error_code=error_code)
        elif original_tag.is_value_class_tag():
            issues += self._unit_validator.check_tag_value_class_valid(original_tag,
                                                                       validate_text,
                                                                       report_as=report_as)
        elif original_tag.extension:
            issues += self._char_validator.check_for_invalid_extension_chars(original_tag,
                                                                             validate_text,
                                                                             index_offset=index_offset)

        return issues

    def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placeholders=False) -> list[dict]:
        """ Validate individual tags in a HED string.

         Parameters:
            hed_string_obj (HedString): A HedString  object.
            allow_placeholders (bool): Allow placeholders in the tags.

         Returns:
            list[dict]: The issues associated with the individual tags. Each issue is a dictionary.

         """
        from hed.models.definition_dict import DefTagNames
        validation_issues = []
        definition_groups = hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY},
                                                               include_groups=1)
        all_definition_groups = [group for sub_group in definition_groups for group in sub_group.get_all_groups()]
        for group in hed_string_obj.get_all_groups():
            is_definition = group in all_definition_groups
            for hed_tag in group.tags():
                if not self._definitions_allowed and hed_tag.short_base_tag == DefTagNames.DEFINITION_KEY:
                    validation_issues += error_reporter.ErrorHandler.format_error(
                        DefinitionErrors.BAD_DEFINITION_LOCATION, hed_tag)
                validation_issues += \
                    self._tag_validator.run_individual_tag_validators(hed_tag, allow_placeholders=allow_placeholders,
                                                                      is_definition=is_definition)
                if (hed_tag.short_base_tag == DefTagNames.DEF_KEY or
                        hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY):
                    validation_issues += (
                        self._def_validator.validate_def_value_units(hed_tag,
                                                                     self, allow_placeholders=allow_placeholders))
                elif (hed_tag.short_base_tag == DefTagNames.DEFINITION_KEY) and hed_tag.extension.endswith("/#"):
                    validation_issues += self.validate_units(hed_tag, hed_tag.extension[:-2])
                elif not (allow_placeholders and '#' in hed_tag.extension):
                    validation_issues += self.validate_units(hed_tag)

        return validation_issues

check_tag_formatting

check_tag_formatting(original_tag) -> list[dict]

Report repeated or erroneous slashes.

Parameters:

Name Type Description Default
original_tag HedTag

The original tag that is used to report the error.

required

Returns:

Type Description
list[dict]

list[dict]: Validation issues. Each issue is a dictionary.

Source code in hed/validator/hed_validator.py
def check_tag_formatting(self, original_tag) -> list[dict]:
    """ Report repeated or erroneous slashes.

    Parameters:
        original_tag (HedTag): The original tag that is used to report the error.

    Returns:
        list[dict]: Validation issues. Each issue is a dictionary.
    """
    validation_issues = []
    for match in self.pattern_doubleslash.finditer(original_tag.org_tag):
        validation_issues += error_reporter.ErrorHandler.format_error(ValidationErrors.NODE_NAME_EMPTY,
                                                                      tag=original_tag,
                                                                      index_in_tag=match.start(),
                                                                      index_in_tag_end=match.end())

    return validation_issues

run_basic_checks

run_basic_checks(
    hed_string, allow_placeholders
) -> list[dict]

Run basic validation checks on a HED string.

Parameters:

Name Type Description Default
hed_string HedString

The HED string to validate.

required
allow_placeholders bool

Whether placeholders are allowed in the HED string.

required

Returns:

Type Description
list[dict]

list[dict]: A list of issues found during validation. Each issue is represented as a dictionary.

Notes
  • This method performs initial validation checks on the HED string, including character validation and tag validation.
  • It checks for invalid characters, calculates canonical forms, and validates individual tags.
  • If any issues are found during these checks, the method stops and returns the issues immediately.
  • The method also validates definition tags if applicable.
Source code in hed/validator/hed_validator.py
def run_basic_checks(self, hed_string, allow_placeholders) -> list[dict]:
    """Run basic validation checks on a HED string.

    Parameters:
        hed_string (HedString): The HED string to validate.
        allow_placeholders (bool): Whether placeholders are allowed in the HED string.

    Returns:
        list[dict]: A list of issues found during validation. Each issue is represented as a dictionary.

    Notes:
        - This method performs initial validation checks on the HED string, including character validation and tag validation.
        - It checks for invalid characters, calculates canonical forms, and validates individual tags.
        - If any issues are found during these checks, the method stops and returns the issues immediately.
        - The method also validates definition tags if applicable.

    """
    issues = []
    issues += self._run_hed_string_validators(hed_string, allow_placeholders)
    if error_reporter.check_for_any_errors(issues):
        return issues
    if hed_string == "n/a":
        return issues
    for tag in hed_string.get_all_tags():
        issues += self._run_validate_tag_characters(tag, allow_placeholders=allow_placeholders)
    issues += hed_string._calculate_to_canonical_forms(self._hed_schema)
    if error_reporter.check_for_any_errors(issues):
        return issues
    issues += self._validate_individual_tags_in_hed_string(hed_string, allow_placeholders=allow_placeholders)
    issues += self._def_validator.validate_def_tags(hed_string, self)
    return issues

run_full_string_checks

run_full_string_checks(hed_string) -> list[dict]

Run all full-string validation checks on a HED string.

Parameters:

Name Type Description Default
hed_string HedString

The HED string to validate.

required

Returns:

Type Description
list[dict]

list[dict]: A list of issues found during validation. Each issue is represented as a dictionary.

Notes
  • This method iterates through a series of validation checks defined in the checks list.
  • Each check is a callable function that takes hed_string as input and returns a list of issues.
  • If any check returns issues, the method stops and returns those issues immediately.
  • If no issues are found, an empty list is returned.
Source code in hed/validator/hed_validator.py
def run_full_string_checks(self, hed_string) -> list[dict]:
    """Run all full-string validation checks on a HED string.

    Parameters:
        hed_string (HedString): The HED string to validate.

    Returns:
        list[dict]: A list of issues found during validation. Each issue is represented as a dictionary.

    Notes:
        - This method iterates through a series of validation checks defined in the `checks` list.
        - Each check is a callable function that takes `hed_string` as input and returns a list of issues.
        - If any check returns issues, the method stops and returns those issues immediately.
        - If no issues are found, an empty list is returned.

    """
    checks = [
        self._group_validator.run_all_tags_validators,
        self._group_validator.run_tag_level_validators,
        self._def_validator.validate_onset_offset,
    ]

    for check in checks:
        issues = check(hed_string)  # Call each function with `hed_string`
        if issues:
            return issues

    return []  # Return an empty list if no issues are found

validate

validate(
    hed_string, allow_placeholders, error_handler=None
) -> list[dict]

Validate the HED string object using the schema.

Parameters:

Name Type Description Default
hed_string HedString

the string to validate.

required
allow_placeholders bool

allow placeholders in the string.

required
error_handler ErrorHandler or None

the error handler to use, creates a default one if none passed.

None

Returns:

Type Description
list[dict]

list[dict]: A list of issues for HED string.

Source code in hed/validator/hed_validator.py
def validate(self, hed_string, allow_placeholders, error_handler=None) -> list[dict]:
    """ Validate the HED string object using the schema.

    Parameters:
        hed_string (HedString): the string to validate.
        allow_placeholders (bool): allow placeholders in the string.
        error_handler (ErrorHandler or None): the error handler to use, creates a default one if none passed.

    Returns:
        list[dict]: A list of issues for HED string.
    """
    if not error_handler:
        error_handler = error_reporter.ErrorHandler()
    issues = []
    issues += self.run_basic_checks(hed_string, allow_placeholders=allow_placeholders)
    error_handler.add_context_and_filter(issues)
    if error_reporter.check_for_any_errors(issues):
        return issues
    issues += self.run_full_string_checks(hed_string)
    error_handler.add_context_and_filter(issues)
    return issues

validate_units

validate_units(
    original_tag,
    validate_text=None,
    report_as=None,
    error_code=None,
    index_offset=0,
) -> list[dict]

Validate units and value classes

Parameters:

Name Type Description Default
original_tag HedTag

The source tag

required
validate_text str

the text we want to validate, if not the full extension.

None
report_as HedTag

Report the error tag as coming from a different one. Mostly for definitions that expand.

None
error_code str

The code to override the error as. Again mostly for def/def-expand tags.

None
index_offset int

Offset into the extension validate_text starts at

0

Returns:

Type Description
list[dict]

list[dict]: Issues found from units

Source code in hed/validator/hed_validator.py
def validate_units(self, original_tag, validate_text=None, report_as=None, error_code=None,
                   index_offset=0) -> list[dict]:
    """Validate units and value classes

    Parameters:
        original_tag (HedTag): The source tag
        validate_text (str): the text we want to validate, if not the full extension.
        report_as (HedTag): Report the error tag as coming from a different one.
                           Mostly for definitions that expand.
        error_code (str): The code to override the error as.  Again mostly for def/def-expand tags.
        index_offset (int): Offset into the extension validate_text starts at

    Returns:
        list[dict]: Issues found from units
    """
    if validate_text is None:
        validate_text = original_tag.extension
    issues = []
    if validate_text == '#':
        return []
    if original_tag.is_unit_class_tag():
        issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag,
                                                                            validate_text,
                                                                            report_as=report_as,
                                                                            error_code=error_code)
    elif original_tag.is_value_class_tag():
        issues += self._unit_validator.check_tag_value_class_valid(original_tag,
                                                                   validate_text,
                                                                   report_as=report_as)
    elif original_tag.extension:
        issues += self._char_validator.check_for_invalid_extension_chars(original_tag,
                                                                         validate_text,
                                                                         index_offset=index_offset)

    return issues

Definition Validator

def_validator

Validates of Def, Def-expand and Temporal groups.

DefValidator

Bases: DefinitionDict

Validates Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset

Source code in hed/validator/def_validator.py
class DefValidator(DefinitionDict):
    """ Validates Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset

    """

    def __init__(self, def_dicts=None, hed_schema=None):
        """ Initialize for definitions in HED strings.

        Parameters:
            def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass
            hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise.
        """
        super().__init__(def_dicts, hed_schema=hed_schema)

    def validate_def_tags(self, hed_string_obj, hed_validator=None):
        """ Validate Def/Def-Expand tags.

        Parameters:
            hed_string_obj (HedString): The HED string to process.
            hed_validator (HedValidator): Used to validate the placeholder replacement.
        Returns:
            list: Issues found related to validating defs. Each issue is a dictionary.
        """
        # This is needed primarily to validate the contents of a def-expand matches the default.
        def_issues = []
        # We need to check for labels to expand in ALL groups
        for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
            def_issues += self._validate_def_contents(def_tag, def_expand_group)

        return def_issues

    @staticmethod
    def _report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag) -> list[dict]:
        """Returns the correct error for this type of def tag

        Parameters:
            def_tag(HedTag): The source tag
            def_entry(DefinitionEntry): The entry for this definition
            is_def_expand_tag(bool): If the given def_tag is a def-expand tag or not.

        Returns:
            list[dict]: Issues found from validating placeholders.
        """
        def_issues = []
        if def_entry.takes_value:
            error_code = ValidationErrors.HED_DEF_VALUE_MISSING
            if is_def_expand_tag:
                error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING
        else:
            error_code = ValidationErrors.HED_DEF_VALUE_EXTRA
            if is_def_expand_tag:
                error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA
        def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
        return def_issues

    def _validate_def_contents(self, def_tag, def_expand_group) -> list[dict]:
        """ Check for issues with expanding a tag from Def to a Def-expand tag group

        Parameters:
            def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.
            def_expand_group (HedGroup or HedTag): Source group for this def-expand tag.
                                                   Same as def_tag if this is not a def-expand tag.
        Returns:
            list[dict]: Issues found from validating placeholders.
        """
        is_def_expand_tag = def_expand_group != def_tag
        tag_label, _, placeholder = def_tag.extension.partition('/')
        label_tag_lower = tag_label.casefold()

        # Check if def_entry in def_dicts.
        def_entry = self.defs.get(label_tag_lower)
        if def_entry is None or def_entry.takes_value == (not placeholder):
            error_code = ValidationErrors.HED_DEF_UNMATCHED
            if is_def_expand_tag:
                error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED
            return ErrorHandler.format_error(error_code, tag=def_tag)

        # Check the special case of a definition without contents.
        def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True)
        if is_def_expand_tag and def_expand_group != def_contents:
            return ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
                                             tag=def_tag, actual_def=def_contents,
                                             found_def=def_expand_group)

        return []

    def validate_def_value_units(self, def_tag, hed_validator, allow_placeholders=False) -> list[dict]:
        """Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag"""
        tag_label, _, placeholder = def_tag.extension.partition('/')
        is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY

        def_entry = self.defs.get(tag_label.casefold())
        # These errors will be caught as can't match definition
        if def_entry is None:
            return []

        # Make sure that there aren't any errant placeholders.
        if not allow_placeholders and '#' in placeholder:
            return ErrorHandler.format_error(ValidationErrors.HED_PLACEHOLDER_OUT_OF_CONTEXT, tag=def_tag.tag)

        # Set the appropriate error code
        error_code = ValidationErrors.DEF_INVALID
        if is_def_expand_tag:
            error_code = ValidationErrors.DEF_EXPAND_INVALID

        # Validate the def name vs the name class
        def_issues = hed_validator._unit_validator._check_value_class(def_tag, tag_label, report_as=None)
        # def_issues += hed_validator.validate_units(def_tag,
        #                                            tag_label,
        #                                            error_code=error_code)

        def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True)
        if def_contents and def_entry.takes_value and hed_validator:
            placeholder_tag = def_contents.get_first_group().find_placeholder_tag()
            def_issues += hed_validator.validate_units(placeholder_tag,
                                                       placeholder,
                                                       report_as=def_tag,
                                                       error_code=error_code,
                                                       index_offset=len(tag_label) + 1)

        return def_issues

    def validate_onset_offset(self, hed_string_obj) -> list[dict]:
        """ Validate onset/offset

        Parameters:
            hed_string_obj (HedString): The HED string to check.

        Returns:
            list[dict]: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
        """
        onset_issues = []
        for found_onset, found_group in self._find_onset_tags(hed_string_obj):
            if not found_onset:
                return []

            def_tags = found_group.find_def_tags()
            if not def_tags:
                onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
                continue

            if len(def_tags) > 1:
                onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TOO_MANY_DEFS,
                                                          tag=def_tags[0][0],
                                                          tag_list=[tag[0] for tag in def_tags[1:]])
                continue

            # Get all children but def group and onset/offset, then validate #/type of children.
            def_tag, def_group, _ = def_tags[0]
            if def_group is None:
                def_group = def_tag
            children = [child for child in found_group.children if
                        def_group is not child and found_onset is not child]

            # Delay tag is checked for uniqueness elsewhere, so we can safely remove all of them
            children = [child for child in children
                        if not isinstance(child, HedTag) or child.short_base_tag != DefTagNames.DELAY_KEY]
            max_children = 1
            if found_onset.short_base_tag == DefTagNames.OFFSET_KEY:
                max_children = 0
            if len(children) > max_children:
                onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS,
                                                          def_tag,
                                                          found_group.children)
                continue

            if children:
                # Make this a loop if max_children can be > 1
                child = children[0]
                if not isinstance(child, HedGroup):
                    onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
                                                              child,
                                                              def_tag)

            # At this point we have either an onset or offset tag and it's name
            onset_issues += self._handle_onset_or_offset(def_tag)

        return onset_issues

    @staticmethod
    def _find_onset_tags(hed_string_obj):
        return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS)

    def _handle_onset_or_offset(self, def_tag) -> list[dict]:
        def_name, _, placeholder = def_tag.extension.partition('/')

        def_entry = self.defs.get(def_name.casefold())
        if def_entry is None:
            return ErrorHandler.format_error(TemporalErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
        if bool(def_entry.takes_value) != bool(placeholder):
            return ErrorHandler.format_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
                                             has_placeholder=bool(def_entry.takes_value))

        return []

validate_def_tags

validate_def_tags(hed_string_obj, hed_validator=None)

Validate Def/Def-Expand tags.

Parameters:

Name Type Description Default
hed_string_obj HedString

The HED string to process.

required
hed_validator HedValidator

Used to validate the placeholder replacement.

None

Returns: list: Issues found related to validating defs. Each issue is a dictionary.

Source code in hed/validator/def_validator.py
def validate_def_tags(self, hed_string_obj, hed_validator=None):
    """ Validate Def/Def-Expand tags.

    Parameters:
        hed_string_obj (HedString): The HED string to process.
        hed_validator (HedValidator): Used to validate the placeholder replacement.
    Returns:
        list: Issues found related to validating defs. Each issue is a dictionary.
    """
    # This is needed primarily to validate the contents of a def-expand matches the default.
    def_issues = []
    # We need to check for labels to expand in ALL groups
    for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
        def_issues += self._validate_def_contents(def_tag, def_expand_group)

    return def_issues

validate_def_value_units

validate_def_value_units(
    def_tag, hed_validator, allow_placeholders=False
) -> list[dict]

Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag

Source code in hed/validator/def_validator.py
def validate_def_value_units(self, def_tag, hed_validator, allow_placeholders=False) -> list[dict]:
    """Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag"""
    tag_label, _, placeholder = def_tag.extension.partition('/')
    is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY

    def_entry = self.defs.get(tag_label.casefold())
    # These errors will be caught as can't match definition
    if def_entry is None:
        return []

    # Make sure that there aren't any errant placeholders.
    if not allow_placeholders and '#' in placeholder:
        return ErrorHandler.format_error(ValidationErrors.HED_PLACEHOLDER_OUT_OF_CONTEXT, tag=def_tag.tag)

    # Set the appropriate error code
    error_code = ValidationErrors.DEF_INVALID
    if is_def_expand_tag:
        error_code = ValidationErrors.DEF_EXPAND_INVALID

    # Validate the def name vs the name class
    def_issues = hed_validator._unit_validator._check_value_class(def_tag, tag_label, report_as=None)
    # def_issues += hed_validator.validate_units(def_tag,
    #                                            tag_label,
    #                                            error_code=error_code)

    def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True)
    if def_contents and def_entry.takes_value and hed_validator:
        placeholder_tag = def_contents.get_first_group().find_placeholder_tag()
        def_issues += hed_validator.validate_units(placeholder_tag,
                                                   placeholder,
                                                   report_as=def_tag,
                                                   error_code=error_code,
                                                   index_offset=len(tag_label) + 1)

    return def_issues

validate_onset_offset

validate_onset_offset(hed_string_obj) -> list[dict]

Validate onset/offset

Parameters:

Name Type Description Default
hed_string_obj HedString

The HED string to check.

required

Returns:

Type Description
list[dict]

list[dict]: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).

Source code in hed/validator/def_validator.py
def validate_onset_offset(self, hed_string_obj) -> list[dict]:
    """ Validate onset/offset

    Parameters:
        hed_string_obj (HedString): The HED string to check.

    Returns:
        list[dict]: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
    """
    onset_issues = []
    for found_onset, found_group in self._find_onset_tags(hed_string_obj):
        if not found_onset:
            return []

        def_tags = found_group.find_def_tags()
        if not def_tags:
            onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
            continue

        if len(def_tags) > 1:
            onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TOO_MANY_DEFS,
                                                      tag=def_tags[0][0],
                                                      tag_list=[tag[0] for tag in def_tags[1:]])
            continue

        # Get all children but def group and onset/offset, then validate #/type of children.
        def_tag, def_group, _ = def_tags[0]
        if def_group is None:
            def_group = def_tag
        children = [child for child in found_group.children if
                    def_group is not child and found_onset is not child]

        # Delay tag is checked for uniqueness elsewhere, so we can safely remove all of them
        children = [child for child in children
                    if not isinstance(child, HedTag) or child.short_base_tag != DefTagNames.DELAY_KEY]
        max_children = 1
        if found_onset.short_base_tag == DefTagNames.OFFSET_KEY:
            max_children = 0
        if len(children) > max_children:
            onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS,
                                                      def_tag,
                                                      found_group.children)
            continue

        if children:
            # Make this a loop if max_children can be > 1
            child = children[0]
            if not isinstance(child, HedGroup):
                onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
                                                          child,
                                                          def_tag)

        # At this point we have either an onset or offset tag and it's name
        onset_issues += self._handle_onset_or_offset(def_tag)

    return onset_issues

Onset Validator

onset_validator

Validates the onset/offset conditions.

OnsetValidator

Validates onset/offset pairs.

Source code in hed/validator/onset_validator.py
class OnsetValidator:
    """ Validates onset/offset pairs. """

    def __init__(self):
        self._onsets = {}

    def validate_temporal_relations(self, hed_string_obj) -> list[dict]:
        """ Validate onset/offset/inset tag relations

        Parameters:
            hed_string_obj (HedString): The HED string to check.

        Returns:
            list[dict]: A list of issues found in validating onsets (i.e., out of order onsets, repeated def names).
        """
        onset_issues = []
        used_def_names = set()
        for temporal_tag, temporal_group in hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS):
            if not temporal_tag:
                return []

            def_tags = temporal_group.find_def_tags(include_groups=0)
            if not def_tags:
                continue

            def_tag = def_tags[0]
            def_name = def_tag.extension
            if def_name.casefold() in used_def_names:
                onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_SAME_DEFS_ONE_ROW, tag=temporal_tag,
                                                          def_name=def_name)
                continue

            used_def_names.add(def_tag.extension.casefold())

            # At this point we have either an onset or offset tag and it's name
            onset_issues += self._handle_onset_or_offset(def_tag, temporal_tag)

        return onset_issues

    def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
        is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_KEY
        full_def_name = def_tag.extension
        if is_onset:
            # onset can never fail as it implies an offset
            self._onsets[full_def_name.casefold()] = full_def_name
        else:
            is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_KEY
            if full_def_name.casefold() not in self._onsets:
                if is_offset:
                    return ErrorHandler.format_error(TemporalErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
                else:
                    return ErrorHandler.format_error(TemporalErrors.INSET_BEFORE_ONSET, tag=def_tag)
            elif is_offset:
                del self._onsets[full_def_name.casefold()]

        return []

    @staticmethod
    def check_for_banned_tags(hed_string) -> list[dict]:
        """ Returns an issue for every tag found from the banned list (for files without onset column).

        Parameters:
            hed_string (HedString): The string to check.

        Returns:
            list[dict]: The validation issues associated with the characters. Each issue is dictionary.
        """
        banned_tag_list = DefTagNames.TIMELINE_KEYS
        issues = []
        for tag in hed_string.get_all_tags():
            if tag.short_base_tag in banned_tag_list:
                issues += ErrorHandler.format_error(TemporalErrors.TEMPORAL_TAG_NO_TIME, tag)
        return issues

check_for_banned_tags staticmethod

check_for_banned_tags(hed_string) -> list[dict]

Returns an issue for every tag found from the banned list (for files without onset column).

Parameters:

Name Type Description Default
hed_string HedString

The string to check.

required

Returns:

Type Description
list[dict]

list[dict]: The validation issues associated with the characters. Each issue is dictionary.

Source code in hed/validator/onset_validator.py
@staticmethod
def check_for_banned_tags(hed_string) -> list[dict]:
    """ Returns an issue for every tag found from the banned list (for files without onset column).

    Parameters:
        hed_string (HedString): The string to check.

    Returns:
        list[dict]: The validation issues associated with the characters. Each issue is dictionary.
    """
    banned_tag_list = DefTagNames.TIMELINE_KEYS
    issues = []
    for tag in hed_string.get_all_tags():
        if tag.short_base_tag in banned_tag_list:
            issues += ErrorHandler.format_error(TemporalErrors.TEMPORAL_TAG_NO_TIME, tag)
    return issues

validate_temporal_relations

validate_temporal_relations(hed_string_obj) -> list[dict]

Validate onset/offset/inset tag relations

Parameters:

Name Type Description Default
hed_string_obj HedString

The HED string to check.

required

Returns:

Type Description
list[dict]

list[dict]: A list of issues found in validating onsets (i.e., out of order onsets, repeated def names).

Source code in hed/validator/onset_validator.py
def validate_temporal_relations(self, hed_string_obj) -> list[dict]:
    """ Validate onset/offset/inset tag relations

    Parameters:
        hed_string_obj (HedString): The HED string to check.

    Returns:
        list[dict]: A list of issues found in validating onsets (i.e., out of order onsets, repeated def names).
    """
    onset_issues = []
    used_def_names = set()
    for temporal_tag, temporal_group in hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS):
        if not temporal_tag:
            return []

        def_tags = temporal_group.find_def_tags(include_groups=0)
        if not def_tags:
            continue

        def_tag = def_tags[0]
        def_name = def_tag.extension
        if def_name.casefold() in used_def_names:
            onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_SAME_DEFS_ONE_ROW, tag=temporal_tag,
                                                      def_name=def_name)
            continue

        used_def_names.add(def_tag.extension.casefold())

        # At this point we have either an onset or offset tag and it's name
        onset_issues += self._handle_onset_or_offset(def_tag, temporal_tag)

    return onset_issues

Sidecar Validator

SidecarValidator

Source code in hed/validator/sidecar_validator.py
class SidecarValidator:
    reserved_column_names = ["HED"]
    reserved_category_values = ["n/a"]

    def __init__(self, hed_schema):
        """
        Constructor for the SidecarValidator class.

        Parameters:
            hed_schema (HedSchema): HED schema object to use for validation.
        """
        self._schema = hed_schema

    def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
        """Validate the input data using the schema

        Parameters:
            sidecar (Sidecar): Input data to be validated.
            extra_def_dicts (list or DefinitionDict): extra def dicts in addition to sidecar
            name (str): The name to report this sidecar as
            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
        Returns:
            list[dict]: A list of issues associated with each level in the HED string.
        """
        from hed.validator import HedValidator
        issues = []
        if error_handler is None:
            error_handler = ErrorHandler()

        error_handler.push_error_context(ErrorContext.FILE_NAME, name)
        issues += self.validate_structure(sidecar, error_handler=error_handler)
        issues += self._validate_refs(sidecar, error_handler)

        # only allowed early out, something is very wrong with structure or refs
        if check_for_any_errors(issues):
            error_handler.pop_error_context()
            return issues
        sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts)
        hed_validator = HedValidator(self._schema, def_dicts=sidecar_def_dict,  definitions_allowed=True)

        issues += sidecar._extract_definition_issues
        issues += sidecar_def_dict.issues

        # todo: Break this function up
        all_ref_columns = sidecar.get_column_refs()
        definition_checks = {}
        for column_data in sidecar:
            column_name = column_data.column_name
            column_data = column_data._get_unvalidated_data()
            hed_strings = column_data.get_hed_strings()
            is_ref_column = column_name in all_ref_columns
            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
            for key_name, hed_string in hed_strings.items():
                new_issues = []
                if len(hed_strings) > 1:
                    error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
                hed_string_obj = HedString(hed_string, hed_schema=self._schema, def_dict=sidecar_def_dict)
                hed_string_obj.remove_refs()

                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                new_issues += hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True)
                def_check_list = definition_checks.setdefault(column_name, [])
                def_check_list.append(hed_string_obj.find_tags({DefTagNames.DEFINITION_KEY}, recursive=True,
                                                               include_groups=0))

                # Might refine this later - for now just skip checking placeholder counts in definition columns.
                if not def_check_list[-1]:
                    new_issues += self._validate_pound_sign_count(hed_string_obj, column_type=column_data.column_type)

                error_handler.add_context_and_filter(new_issues)
                issues += new_issues
                error_handler.pop_error_context()  # Hed String

                # Only do full string checks on full columns, not partial ref columns.
                if not is_ref_column:
                    # TODO: Figure out why this pattern is giving lint errors.
                    refs = re.findall(r"\{([a-z_\-0-9]+)\}", hed_string, re.IGNORECASE)
                    refs_strings = {data.column_name: data.get_hed_strings() for data in sidecar}
                    if "HED" not in refs_strings:
                        refs_strings["HED"] = ["n/a"]
                    for combination in itertools.product(*[refs_strings[key] for key in refs]):
                        new_issues = []
                        ref_dict = dict(zip(refs, combination))
                        modified_string = hed_string
                        for ref in refs:
                            modified_string = df_util.replace_ref(modified_string, f"{{{ref}}}", ref_dict[ref])
                        hed_string_obj = HedString(modified_string, hed_schema=self._schema, def_dict=sidecar_def_dict)

                        error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                        new_issues += hed_validator.run_full_string_checks(hed_string_obj)
                        error_handler.add_context_and_filter(new_issues)
                        issues += new_issues
                        error_handler.pop_error_context()  # Hed string
                if len(hed_strings) > 1:
                    error_handler.pop_error_context()  # Category key

            error_handler.pop_error_context()  # Column Name
        issues += self._check_definitions_bad_spot(definition_checks, error_handler)
        issues = sort_issues(issues)

        error_handler.pop_error_context()  # Filename

        return issues

    def validate_structure(self, sidecar, error_handler):
        """ Validate the raw structure of this sidecar.

        Parameters:
            sidecar (Sidecar): the sidecar to validate
            error_handler (ErrorHandler): The error handler to use for error context

        Returns:
            issues (list): A list of issues found with the structure
        """
        all_validation_issues = []
        for column_name, dict_for_entry in sidecar.loaded_dict.items():
            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
            all_validation_issues += self._validate_column_structure(column_name, dict_for_entry, error_handler)
            error_handler.pop_error_context()
        return all_validation_issues

    def _validate_refs(self, sidecar, error_handler):
        possible_column_refs = sidecar.all_hed_columns

        if "HED" not in possible_column_refs:
            possible_column_refs.append("HED")

        issues = []
        found_column_references = {}
        for column_data in sidecar:
            column_name = column_data.column_name
            if column_data.column_type == ColumnType.Ignore:
                continue
            hed_strings = column_data.get_hed_strings()
            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
            matches = []
            for key_name, hed_string in hed_strings.items():
                new_issues = []
                if len(hed_strings) > 1:
                    error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)

                error_handler.push_error_context(ErrorContext.HED_STRING,
                                                 HedString(hed_string, hed_schema=self._schema))
                invalid_locations = self._find_non_matching_braces(hed_string)
                for loc in invalid_locations:
                    bad_symbol = hed_string[loc]
                    new_issues += error_handler.format_error_with_context(ColumnErrors.MALFORMED_COLUMN_REF,
                                                                          column_name, loc, bad_symbol)

                sub_matches = re.findall(r"\{([a-z_\-0-9]+)\}", hed_string, re.IGNORECASE)
                matches.append(sub_matches)
                for match in sub_matches:
                    if match not in possible_column_refs:
                        new_issues += error_handler.format_error_with_context(ColumnErrors.INVALID_COLUMN_REF, match)

                error_handler.pop_error_context()
                if len(hed_strings) > 1:
                    error_handler.pop_error_context()
                error_handler.add_context_and_filter(new_issues)
                issues += new_issues
            error_handler.pop_error_context()
            references = [match for sublist in matches for match in sublist]
            if references:
                found_column_references[column_name] = references
            if column_name in references:
                issues += error_handler.format_error_with_context(ColumnErrors.SELF_COLUMN_REF, column_name)

        for column_name, refs in found_column_references.items():
            for ref in refs:
                if ref in found_column_references and ref != column_name:
                    issues += error_handler.format_error_with_context(ColumnErrors.NESTED_COLUMN_REF, column_name, ref)
        return issues

    @staticmethod
    def _find_non_matching_braces(hed_string):
        issues = []
        open_brace_index = -1

        for i, char in enumerate(hed_string):
            if char == '{':
                if open_brace_index >= 0:  # Nested brace detected
                    issues.append(open_brace_index)
                open_brace_index = i
            elif char == '}':
                if open_brace_index >= 0:
                    open_brace_index = -1
                else:
                    issues.append(i)

        if open_brace_index >= 0:
            issues.append(open_brace_index)

        return issues

    @staticmethod
    def _check_for_key(key, data):
        # Probably can be cleaned up more -> Return True if any data or subdata is key
        if isinstance(data, dict):
            return SidecarValidator._check_dict(key, data)
        elif isinstance(data, list):
            return SidecarValidator._check_list(key, data)
        return False

    @staticmethod
    def _check_dict(key, data_dict):
        if key in data_dict:
            return True
        for sub_data in data_dict.values():
            if SidecarValidator._check_for_key(key, sub_data):
                return True
        return False

    @staticmethod
    def _check_list(key, data_list):
        for sub_data in data_list:
            if SidecarValidator._check_for_key(key, sub_data):
                return True
        return False

    def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
        """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.

        Parameters:
            error_handler (ErrorHandler)  Sets the context for the error reporting. Cannot be None.

        Returns:
            list:  Issues in performing the operations. Each issue is a dictionary.

        """
        val_issues = []
        if column_name in self.reserved_column_names:
            val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
            return val_issues

        column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False)
        if column_type is None:
            val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
                                                                  column_name=column_name)
        elif column_type == ColumnType.Ignore:
            found_hed = self._check_for_key("HED", dict_for_entry)
            if found_hed:
                val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
        elif column_type == ColumnType.Categorical:
            val_issues += self._validate_categorical_column(column_name, dict_for_entry, error_handler)

        return val_issues

    def _validate_categorical_column(self, column_name, dict_for_entry, error_handler):
        """Validates a categorical column in a json sidecar."""
        val_issues = []
        raw_hed_dict = dict_for_entry["HED"]
        if not raw_hed_dict:
            val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
        for key_name, hed_string in raw_hed_dict.items():
            error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
            if not hed_string:
                val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
            elif not isinstance(hed_string, str):
                val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
                                                                      given_type=type(hed_string),
                                                                      expected_type="str")
            elif key_name in self.reserved_category_values:
                val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
            error_handler.pop_error_context()
        return val_issues

    def _validate_pound_sign_count(self, hed_string, column_type):
        """ Check if a given HED string in the column has the correct number of pound signs.

        Parameters:
            hed_string (str or HedString): HED string to be checked.

        Returns:
            list: Issues due to pound sign errors. Each issue is a dictionary.

        Notes:
            Normally the number of # should be either 0 or 1, but sometimes will be higher due to the
            presence of definition tags.

        """
        # Make a copy without definitions to check placeholder count.
        expected_count, error_type = ColumnMetadata.expected_pound_sign_count(column_type)
        hed_string_copy = copy.deepcopy(hed_string)
        hed_string_copy.remove_definitions()
        hed_string_copy.shrink_defs()

        if str(hed_string_copy).count("#") != expected_count:
            return ErrorHandler.format_error(error_type, pound_sign_count=str(hed_string_copy).count("#"))

        return []

    def _check_definitions_bad_spot(self, definition_checks, error_handler):
        issues = []
        # This could be simplified now
        for col_name, has_def in definition_checks.items():
            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, col_name)
            def_check = set(bool(d) for d in has_def)
            if len(def_check) != 1:
                flat_def_list = [d for defs in has_def for d in defs]
                for d in flat_def_list:
                    issues += error_handler.format_error_with_context(DefinitionErrors.BAD_DEFINITION_LOCATION, d)
            error_handler.pop_error_context()

        return issues

validate

validate(
    sidecar,
    extra_def_dicts=None,
    name=None,
    error_handler=None,
) -> list[dict]

Validate the input data using the schema

Parameters:

Name Type Description Default
sidecar Sidecar

Input data to be validated.

required
extra_def_dicts list or DefinitionDict

extra def dicts in addition to sidecar

None
name str

The name to report this sidecar as

None
error_handler ErrorHandler

Error context to use. Creates a new one if None

None

Returns: list[dict]: A list of issues associated with each level in the HED string.

Source code in hed/validator/sidecar_validator.py
def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
    """Validate the input data using the schema

    Parameters:
        sidecar (Sidecar): Input data to be validated.
        extra_def_dicts (list or DefinitionDict): extra def dicts in addition to sidecar
        name (str): The name to report this sidecar as
        error_handler (ErrorHandler): Error context to use.  Creates a new one if None
    Returns:
        list[dict]: A list of issues associated with each level in the HED string.
    """
    from hed.validator import HedValidator
    issues = []
    if error_handler is None:
        error_handler = ErrorHandler()

    error_handler.push_error_context(ErrorContext.FILE_NAME, name)
    issues += self.validate_structure(sidecar, error_handler=error_handler)
    issues += self._validate_refs(sidecar, error_handler)

    # only allowed early out, something is very wrong with structure or refs
    if check_for_any_errors(issues):
        error_handler.pop_error_context()
        return issues
    sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts)
    hed_validator = HedValidator(self._schema, def_dicts=sidecar_def_dict,  definitions_allowed=True)

    issues += sidecar._extract_definition_issues
    issues += sidecar_def_dict.issues

    # todo: Break this function up
    all_ref_columns = sidecar.get_column_refs()
    definition_checks = {}
    for column_data in sidecar:
        column_name = column_data.column_name
        column_data = column_data._get_unvalidated_data()
        hed_strings = column_data.get_hed_strings()
        is_ref_column = column_name in all_ref_columns
        error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
        for key_name, hed_string in hed_strings.items():
            new_issues = []
            if len(hed_strings) > 1:
                error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
            hed_string_obj = HedString(hed_string, hed_schema=self._schema, def_dict=sidecar_def_dict)
            hed_string_obj.remove_refs()

            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
            new_issues += hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True)
            def_check_list = definition_checks.setdefault(column_name, [])
            def_check_list.append(hed_string_obj.find_tags({DefTagNames.DEFINITION_KEY}, recursive=True,
                                                           include_groups=0))

            # Might refine this later - for now just skip checking placeholder counts in definition columns.
            if not def_check_list[-1]:
                new_issues += self._validate_pound_sign_count(hed_string_obj, column_type=column_data.column_type)

            error_handler.add_context_and_filter(new_issues)
            issues += new_issues
            error_handler.pop_error_context()  # Hed String

            # Only do full string checks on full columns, not partial ref columns.
            if not is_ref_column:
                # TODO: Figure out why this pattern is giving lint errors.
                refs = re.findall(r"\{([a-z_\-0-9]+)\}", hed_string, re.IGNORECASE)
                refs_strings = {data.column_name: data.get_hed_strings() for data in sidecar}
                if "HED" not in refs_strings:
                    refs_strings["HED"] = ["n/a"]
                for combination in itertools.product(*[refs_strings[key] for key in refs]):
                    new_issues = []
                    ref_dict = dict(zip(refs, combination))
                    modified_string = hed_string
                    for ref in refs:
                        modified_string = df_util.replace_ref(modified_string, f"{{{ref}}}", ref_dict[ref])
                    hed_string_obj = HedString(modified_string, hed_schema=self._schema, def_dict=sidecar_def_dict)

                    error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                    new_issues += hed_validator.run_full_string_checks(hed_string_obj)
                    error_handler.add_context_and_filter(new_issues)
                    issues += new_issues
                    error_handler.pop_error_context()  # Hed string
            if len(hed_strings) > 1:
                error_handler.pop_error_context()  # Category key

        error_handler.pop_error_context()  # Column Name
    issues += self._check_definitions_bad_spot(definition_checks, error_handler)
    issues = sort_issues(issues)

    error_handler.pop_error_context()  # Filename

    return issues

validate_structure

validate_structure(sidecar, error_handler)

Validate the raw structure of this sidecar.

Parameters:

Name Type Description Default
sidecar Sidecar

the sidecar to validate

required
error_handler ErrorHandler

The error handler to use for error context

required

Returns:

Name Type Description
issues list

A list of issues found with the structure

Source code in hed/validator/sidecar_validator.py
def validate_structure(self, sidecar, error_handler):
    """ Validate the raw structure of this sidecar.

    Parameters:
        sidecar (Sidecar): the sidecar to validate
        error_handler (ErrorHandler): The error handler to use for error context

    Returns:
        issues (list): A list of issues found with the structure
    """
    all_validation_issues = []
    for column_name, dict_for_entry in sidecar.loaded_dict.items():
        error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
        all_validation_issues += self._validate_column_structure(column_name, dict_for_entry, error_handler)
        error_handler.pop_error_context()
    return all_validation_issues

Spreadsheet Validator

spreadsheet_validator

Validates spreadsheet tabular data.

SpreadsheetValidator

Source code in hed/validator/spreadsheet_validator.py
class SpreadsheetValidator:
    ONSET_TOLERANCE = 10-7
    TEMPORAL_ANCHORS = re.compile(r"|".join(map(re.escape, ["onset", "inset", "offset", "delay"])))

    def __init__(self, hed_schema):
        """
        Constructor for the SpreadsheetValidator class.

        Parameters:
            hed_schema (HedSchema): HED schema object to use for validation.
        """
        self._schema = hed_schema
        self._hed_validator = None
        self._onset_validator = None
        self.invalid_original_rows = set()

    def validate(self, data, def_dicts=None, name=None, error_handler=None) -> list[dict]:
        """
        Validate the input data using the schema

        Parameters:
            data (BaseInput): Input data to be validated.
            def_dicts (list of DefDict or DefDict): all definitions to use for validation
            name (str): The name to report errors from this file as
            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
        Returns:
            list[dict]: A list of issues for HED string
        """

        if error_handler is None:
            error_handler = ErrorHandler()

        if not isinstance(data, BaseInput):
            raise TypeError("Invalid type passed to spreadsheet validator.  Can only validate BaseInput objects.")

        self.invalid_original_rows = set()

        error_handler.push_error_context(ErrorContext.FILE_NAME, name)
        # Adjust to account for 1 based
        row_adj = 1
        # Adjust to account for column names
        if data.has_column_names:
            row_adj += 1

        issues = self._validate_column_structure(data, error_handler)

        if data.needs_sorting:
            data_new = copy.deepcopy(data)
            data_new._dataframe = df_util.sort_dataframe_by_onsets(data.dataframe)
            issues += error_handler.format_error_with_context(ValidationErrors.ONSETS_UNORDERED)
            data = data_new

        # If there are n/a errors in the onset column, further validation cannot proceed
        onsets = data.onsets
        if onsets is not None:
            onsets = onsets.astype(str).str.strip()
            onsets = pd.to_numeric(onsets, errors='coerce')
            assembled = data.series_a
            na_issues = self._check_onset_nans(onsets, assembled, self._schema, error_handler, row_adj)
            issues += na_issues
            if len(na_issues) > 0:
                return issues
            onsets = df_util.split_delay_tags(assembled, self._schema, onsets)
        else:
            onsets = None

        df = data.dataframe_a

        self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
        if onsets is not None:
            self._onset_validator = OnsetValidator()
            onset_mask = ~pd.isna(pd.to_numeric(onsets['onset'], errors='coerce'))
        else:
            self._onset_validator = None
            onset_mask = None

        # Check the rows of the input data
        issues += self._run_checks(df, error_handler=error_handler, row_adj=row_adj, onset_mask=onset_mask)
        if self._onset_validator:
            issues += self._run_onset_checks(onsets, error_handler=error_handler, row_adj=row_adj)
            issues += self._recheck_duplicates(onsets, error_handler=error_handler, row_adj=row_adj)
        error_handler.pop_error_context()

        issues = sort_issues(issues)
        return issues

    def _run_checks(self, hed_df, error_handler, row_adj, onset_mask=None):
        issues = []
        columns = list(hed_df.columns)
        self.invalid_original_rows = set()
        for row_number, text_file_row in hed_df.iterrows():
            error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
            row_strings = []
            new_column_issues = []
            for column_number, cell in enumerate(text_file_row):
                if not cell or cell == "n/a":
                    continue

                error_handler.push_error_context(ErrorContext.COLUMN, columns[column_number])

                column_hed_string = HedString(cell, self._schema)
                row_strings.append(column_hed_string)
                error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string)
                new_column_issues = self._hed_validator.run_basic_checks(column_hed_string, allow_placeholders=False)

                error_handler.add_context_and_filter(new_column_issues)
                error_handler.pop_error_context()  # HedString
                error_handler.pop_error_context()  # column

                issues += new_column_issues
            # We want to do full onset checks on the combined and filtered rows
            if check_for_any_errors(new_column_issues):
                self.invalid_original_rows.add(row_number)
                error_handler.pop_error_context()  # Row
                continue

            if not row_strings or (onset_mask is not None and onset_mask.iloc[row_number]):
                error_handler.pop_error_context()  # Row
                continue

            # Continue on if not a timeline file
            row_string = HedString.from_hed_strings(row_strings)

            if row_string:
                error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
                new_column_issues = self._hed_validator.run_full_string_checks(row_string)
                new_column_issues += OnsetValidator.check_for_banned_tags(row_string)
                error_handler.add_context_and_filter(new_column_issues)
                error_handler.pop_error_context()  # HedString
                issues += new_column_issues
            error_handler.pop_error_context()  # Row
        return issues

    def _run_onset_checks(self, onset_filtered, error_handler, row_adj):
        issues = []
        for row in onset_filtered[["HED", "original_index"]].itertuples(index=True):
            # Skip rows that had issues.
            if row.original_index in self.invalid_original_rows:
                continue
            error_handler.push_error_context(ErrorContext.ROW, row.original_index + row_adj)
            row_string = HedString(row.HED, self._schema, self._hed_validator._def_validator)

            if row_string:
                error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
                new_column_issues = self._hed_validator.run_full_string_checks(row_string)
                new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
                error_handler.add_context_and_filter(new_column_issues)
                error_handler.pop_error_context()  # HedString
                issues += new_column_issues
            error_handler.pop_error_context()  # Row
        return issues

    def _recheck_duplicates(self, onset_filtered, error_handler, row_adj):
        issues = []
        for i in range(len(onset_filtered) - 1):
            current_row = onset_filtered.iloc[i]
            next_row = onset_filtered.iloc[i + 1]

            # Skip if the HED column is empty or there was already an error
            if not current_row["HED"] or \
                (current_row["original_index"] in self.invalid_original_rows) or \
                    (not self._is_within_tolerance(next_row["onset"], current_row["onset"])):
                continue

            # At least two rows have been merged with their onsets recognized as the same.
            error_handler.push_error_context(ErrorContext.ROW, current_row.original_index + row_adj)
            row_string = HedString(current_row.HED, self._schema, self._hed_validator._def_validator)
            error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
            new_column_issues = self._hed_validator.run_full_string_checks(row_string)
            error_handler.add_context_and_filter(new_column_issues)
            error_handler.pop_error_context()  # HedString
            issues += new_column_issues
            error_handler.pop_error_context()  # Row

        return issues

    def _is_within_tolerance(self, onset1, onset2):
        """
        Checks if two onset strings are within the specified tolerance.

        Parameters:
            onset1 (str): The first onset value as a string.
            onset2 (str): The second onset value as a string.

        Returns:
            bool: True if the values are within tolerance and valid, False otherwise.
        """
        try:
            # Convert to floats
            onset1 = float(onset1)
            onset2 = float(onset2)

            # Check if both values are finite
            if not (math.isfinite(onset1) and math.isfinite(onset2)):
                return False

            # Check if the difference is within tolerance
            return abs(onset1 - onset2) <= self.ONSET_TOLERANCE
        except ValueError:
            # Return False if either value is not convertible to a float
            return False

    def _validate_column_structure(self, base_input, error_handler):
        """
        Validate that each column in the input data has valid values.

        Parameters:
            base_input (BaseInput): The input data to be validated.
            error_handler (ErrorHandler): Holds context
        Returns:
            List of issues associated with each invalid value. Each issue is a dictionary.
        """
        issues = []
        col_issues = base_input._mapper.check_for_mapping_issues()
        error_handler.add_context_and_filter(col_issues)
        issues += col_issues
        for column in base_input.column_metadata().values():
            if column.column_type == ColumnType.Categorical:
                valid_keys = set(column.hed_dict.keys())
                column_values = base_input.dataframe[column.column_name]

                # Find non n/a values that are not in the valid keys
                invalid_values = set(column_values[(column_values != "n/a") & (~column_values.isin(valid_keys))])

                # If there are invalid values, log a single error
                if invalid_values:
                    error_handler.push_error_context(ErrorContext.COLUMN, column.column_name)
                    issues += error_handler.format_error_with_context(ValidationErrors.SIDECAR_KEY_MISSING,
                        invalid_keys=str(list(invalid_values)),  category_keys=list(valid_keys),
                        column_name=column.column_name)
                    error_handler.pop_error_context()

        column_refs = set(base_input.get_column_refs())  # Convert to set for O(1) lookup
        columns = set(base_input.columns)  # Convert to set for efficient comparison

        # Find missing column references
        missing_refs = column_refs - columns  # Set difference: elements in column_refs but not in columns

        # If there are missing references, log a single error
        if missing_refs:
            issues += error_handler.format_error_with_context(
                ValidationErrors.TSV_COLUMN_MISSING,
                invalid_keys=list(missing_refs)  # Include all missing column references
            )

        return issues

    def _check_onset_nans(self, onsets, assembled, hed_schema, error_handler, row_adj):
        onset_mask = pd.isna(onsets)
        if not onset_mask.any():
            return []
        filtered = assembled[onset_mask]
        issues = []
        for index, value in filtered.items():
            if not bool(self.TEMPORAL_ANCHORS.search(value.casefold())):
                continue
            hed_obj = HedString(value, hed_schema)
            error_handler.push_error_context(ErrorContext.ROW, index + row_adj)
            error_handler.push_error_context(ErrorContext.HED_STRING, hed_obj)
            for tag in hed_obj.find_top_level_tags(anchor_tags=DefTagNames.TIMELINE_KEYS, include_groups=0):
                issues += error_handler.format_error_with_context(TemporalErrors.TEMPORAL_TAG_NO_TIME, tag=tag)
            error_handler.pop_error_context()
            error_handler.pop_error_context()
        return issues

validate

validate(
    data, def_dicts=None, name=None, error_handler=None
) -> list[dict]

Validate the input data using the schema

Parameters:

Name Type Description Default
data BaseInput

Input data to be validated.

required
def_dicts list of DefDict or DefDict

all definitions to use for validation

None
name str

The name to report errors from this file as

None
error_handler ErrorHandler

Error context to use. Creates a new one if None

None

Returns: list[dict]: A list of issues for HED string

Source code in hed/validator/spreadsheet_validator.py
def validate(self, data, def_dicts=None, name=None, error_handler=None) -> list[dict]:
    """
    Validate the input data using the schema

    Parameters:
        data (BaseInput): Input data to be validated.
        def_dicts (list of DefDict or DefDict): all definitions to use for validation
        name (str): The name to report errors from this file as
        error_handler (ErrorHandler): Error context to use.  Creates a new one if None
    Returns:
        list[dict]: A list of issues for HED string
    """

    if error_handler is None:
        error_handler = ErrorHandler()

    if not isinstance(data, BaseInput):
        raise TypeError("Invalid type passed to spreadsheet validator.  Can only validate BaseInput objects.")

    self.invalid_original_rows = set()

    error_handler.push_error_context(ErrorContext.FILE_NAME, name)
    # Adjust to account for 1 based
    row_adj = 1
    # Adjust to account for column names
    if data.has_column_names:
        row_adj += 1

    issues = self._validate_column_structure(data, error_handler)

    if data.needs_sorting:
        data_new = copy.deepcopy(data)
        data_new._dataframe = df_util.sort_dataframe_by_onsets(data.dataframe)
        issues += error_handler.format_error_with_context(ValidationErrors.ONSETS_UNORDERED)
        data = data_new

    # If there are n/a errors in the onset column, further validation cannot proceed
    onsets = data.onsets
    if onsets is not None:
        onsets = onsets.astype(str).str.strip()
        onsets = pd.to_numeric(onsets, errors='coerce')
        assembled = data.series_a
        na_issues = self._check_onset_nans(onsets, assembled, self._schema, error_handler, row_adj)
        issues += na_issues
        if len(na_issues) > 0:
            return issues
        onsets = df_util.split_delay_tags(assembled, self._schema, onsets)
    else:
        onsets = None

    df = data.dataframe_a

    self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
    if onsets is not None:
        self._onset_validator = OnsetValidator()
        onset_mask = ~pd.isna(pd.to_numeric(onsets['onset'], errors='coerce'))
    else:
        self._onset_validator = None
        onset_mask = None

    # Check the rows of the input data
    issues += self._run_checks(df, error_handler=error_handler, row_adj=row_adj, onset_mask=onset_mask)
    if self._onset_validator:
        issues += self._run_onset_checks(onsets, error_handler=error_handler, row_adj=row_adj)
        issues += self._recheck_duplicates(onsets, error_handler=error_handler, row_adj=row_adj)
    error_handler.pop_error_context()

    issues = sort_issues(issues)
    return issues

Validation Utilities

util

Validation of HED tags.