Source code for hed.validator.util.class_util

"""Utilities to support HED validation."""

import datetime
import re

from hed.errors.error_reporter import ErrorHandler
from hed.errors.error_types import ValidationErrors
from hed.validator.util.char_util import CharRexValidator


[docs] class UnitValueValidator: """Validates units.""" DATE_TIME_VALUE_CLASS = "dateTimeClass" NUMERIC_VALUE_CLASS = "numericClass" TEXT_VALUE_CLASS = "textClass" NAME_VALUE_CLASS = "nameClass" DIGIT_OR_POUND_EXPRESSION = r"^(-?[\d.]+(?:e-?\d+)?|#)$" def __init__(self, modern_allowed_char_rules=False, value_validators=None): """Validates the unit and value classes on a given tag. Parameters: value_validators(dict or None): Override or add value class validators """ self._validate_characters = modern_allowed_char_rules self._value_validators = self._get_default_value_class_validators() self._char_validator = CharRexValidator() if value_validators and isinstance(value_validators, dict): self._value_validators.update(value_validators) def _get_default_value_class_validators(self): """Return a dictionary of value class validator functions. Returns: dict: Dictionary of value class validator functions. """ validator_dict = { self.DATE_TIME_VALUE_CLASS: is_date_time_value_class, self.NUMERIC_VALUE_CLASS: is_numeric_value_class, self.TEXT_VALUE_CLASS: is_text_value_class, self.NAME_VALUE_CLASS: is_name_value_class, } return validator_dict
[docs] def check_tag_unit_class_units_are_valid( self, original_tag, validate_text, report_as=None, error_code=None, allow_placeholders=True ) -> list[dict]: """Report incorrect unit class or units. Parameters: original_tag (HedTag): The original tag that is used to report the error. validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. error_code (str): Override error codes. allow_placeholders (bool): Whether placeholders are allowed (affects value class validation for "#") Returns: list: Validation issues. Each issue is a dictionary. """ if not original_tag.is_unit_class_tag(): return [] validation_issues = [] # Check the units first stripped_value, units = original_tag.get_stripped_unit_value(validate_text) if not stripped_value: # stripped_value is None only when invalid units are present validation_issues += self._report_bad_units(original_tag, report_as) return validation_issues # If value is a placeholder (#) and placeholders are allowed, it's valid # Invalid units would have been caught above (stripped_value would be None) if stripped_value == "#" and allow_placeholders: return validation_issues # Check the value classes # If placeholders are NOT allowed, "#" will fail value class validation (e.g., not a valid number) validation_issues += self._check_value_class(original_tag, stripped_value, report_as) # Override error code if specified (for def/def-expand tags) if error_code and validation_issues and not any(error_code == issue["code"] for issue in validation_issues): new_issue = validation_issues[0].copy() new_issue["code"] = error_code validation_issues += [new_issue] return validation_issues
[docs] def check_tag_value_class_valid(self, original_tag, validate_text, report_as=None) -> list[dict]: """Report an invalid value portion. Parameters: original_tag (HedTag): The original tag that is used to report the error. validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. Returns: list: Validation issues. """ return self._check_value_class(original_tag, validate_text, report_as)
def _get_problem_indices(self, stripped_value, class_name, start_index=0): indices = self._char_validator.get_problem_chars(stripped_value, class_name) if indices: indices = [(char, index + start_index) for index, char in indices] return indices def _check_value_class(self, original_tag, stripped_value, report_as): """Return any issues found if this is a value tag, Parameters: original_tag (HedTag): The original tag that is used to report the error. stripped_value (str): value without units report_as (HedTag): Report as this tag. Returns: list: List of dictionaries of validation issues. """ if not original_tag.is_takes_value_tag(): return [] classes = list(original_tag.value_classes.keys()) if not classes: return [] start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 report_as = report_as if report_as else original_tag class_valid = {} for class_name in classes: class_valid[class_name] = self._char_validator.is_valid_value(stripped_value, class_name) char_errors = {} for class_name in classes: char_errors[class_name] = self._get_problem_indices(stripped_value, class_name, start_index=start_index) if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class return [] validation_issues = self.report_value_errors(char_errors, class_valid, report_as) return validation_issues
[docs] @staticmethod def report_value_errors(error_dict, class_valid, report_as): """Build validation issues from per-class character error and validity dicts. Parameters: error_dict (dict): Mapping of class name to list of (char, index) problem tuples. class_valid (dict): Mapping of class name to a validity result (``True``, ``re.Match``, or ``False``) indicating whether the full value passed word-level format validation for that class. report_as (HedTag): The tag object used as context in error reporting. Returns: list[dict]: Validation issue dictionaries. """ validation_issues = [] for class_name, errors in error_dict.items(): if not errors and class_valid[class_name]: continue elif not class_valid[class_name]: validation_issues += ErrorHandler.format_error( ValidationErrors.INVALID_VALUE_CLASS_VALUE, index_in_tag=0, index_in_tag_end=len(report_as.org_tag), value_class=class_name, tag=report_as, ) elif errors: validation_issues.extend(UnitValueValidator.report_value_char_errors(class_name, errors, report_as)) return validation_issues
[docs] @staticmethod def report_value_char_errors(class_name, errors, report_as): """Build validation issues for specific invalid characters within a value class string. Parameters: class_name (str): The value class name that detected the errors. errors (list[tuple[str, int]]): Character/index pairs of invalid characters. report_as (HedTag): The tag object used as context in error reporting. Returns: list[dict]: Validation issue dictionaries. """ validation_issues = [] for value in errors: if value[0] in "{}": validation_issues += ErrorHandler.format_error( ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, tag=report_as, problem_tag=value[0] ) else: validation_issues += ErrorHandler.format_error( ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, tag=report_as, value_class=class_name, problem_tag=value[0], ) return validation_issues
@staticmethod def _report_bad_units(original_tag, report_as): """Returns an issue noting this is bad units Parameters: original_tag (HedTag): The original tag that is used to report the error. report_as (HedTag): Report as this tag. Returns: list: List of dictionaries of validation issues. """ report_as = report_as if report_as else original_tag tag_unit_class_units = original_tag.get_tag_unit_class_units() return ErrorHandler.format_error(ValidationErrors.UNITS_INVALID, tag=report_as, units=tag_unit_class_units) def _validate_value_class_portion(self, original_tag, portion_to_validate): if portion_to_validate is None: return False value_class_types = original_tag.value_classes return self.validate_value_class_type(portion_to_validate, value_class_types)
[docs] def validate_value_class_type(self, unit_or_value_portion, valid_types) -> bool: """Report invalid unit or valid class values. Parameters: unit_or_value_portion (str): The value portion to validate. valid_types (list): The names of value class or unit class types (e.g. dateTime or dateTimeClass). Returns: bool: True if this is one of the valid_types validators. """ has_valid_func = False for unit_class_type in valid_types: valid_func = self._value_validators.get(unit_class_type) if valid_func: has_valid_func = True if valid_func(unit_or_value_portion): return True return not has_valid_func
def find_invalid_positions(s, pattern): """Return a list of (index, char) pairs for characters in s that do not match pattern. Parameters: s (str): The string to scan. pattern (str): A single-character regex pattern specifying valid characters. Returns: list[tuple[int, str]]: Each tuple contains the character index and the invalid character. """ # List to store positions of invalid characters invalid_positions = [] # Iterate over the string, check each character for i, char in enumerate(s): if not re.match(pattern, char): # If the character does not match, record its position and value invalid_positions.append((i, char)) return invalid_positions def is_date_time_value_class(date_time_string) -> bool: """Check if the specified string is a valid datetime. Parameters: date_time_string (str): A datetime string. Returns: bool: True if the datetime string is valid. False, if otherwise. Notes: - ISO 8601 datetime string. """ try: date_time_obj = datetime.datetime.fromisoformat(date_time_string) return not date_time_obj.tzinfo except ValueError: return False def is_name_value_class(name_str) -> bool: """Return True if name_str is a valid HED name-value. Allowed characters are ASCII word characters (letters, digits, underscore), hyphens, and Unicode code points U+0080 through U+FFFF. Parameters: name_str (str): The string to validate. Returns: bool: True if the string matches the allowed pattern. """ pattern = r"^[\w\-\u0080-\uFFFF]+$" if re.fullmatch(pattern, name_str): return True else: return False def is_numeric_value_class(numeric_string) -> bool: """Check to see if valid numeric value. Parameters: numeric_string (str): A string that should be only a number with no units. Returns: bool: True if the numeric string is valid. False, if otherwise. """ if re.search(UnitValueValidator.DIGIT_OR_POUND_EXPRESSION, numeric_string): return True return False def is_text_value_class(text_string) -> bool: """Placeholder for eventual text value class validation. Parameters: text_string (str): Text class. Returns: bool: True """ return True def is_clock_face_time(time_string) -> bool: """Check if a valid HH:MM time string. Parameters: time_string (str): A time string. Returns: bool: True if the time string is valid. False, if otherwise. Notes: - This is deprecated and has no expected use going forward. """ try: time_obj = datetime.time.fromisoformat(time_string) return not time_obj.tzinfo and not time_obj.microsecond except ValueError: return False