Schema¶

HED schema management, loading, and validation functionality.

HedSchema¶

HedSchema ¶

Bases: HedSchemaBase

A HED schema suitable for processing.

Source code in hed/schema/hed_schema.py

class HedSchema(HedSchemaBase):
    """ A HED schema suitable for processing. """

    def __init__(self):
        """ Constructor for the HedSchema class.

            A HedSchema can be used for validation, checking tag attributes, parsing tags, etc.
        """
        super().__init__()
        self.header_attributes = {}
        self.filename = None
        self.prologue = ""
        self.epilogue = ""
        self.extras = {} # Used to store any additional data that might be needed for serialization (like OWL or other formats)

        # This is the specified library name_prefix - tags will be {schema_namespace}:{tag_name}
        self._namespace = ""

        self._sections = self._create_empty_sections()
        self.source_format = None  # The type of file this was loaded from(mediawiki, xml, or owl - None if mixed)

    # ===============================================
    # Basic schema properties
    # ===============================================
    @property
    def version_number(self) -> str:
        """ The HED version of this schema.

        Returns:
            str: The version of this schema.
        """
        return self.header_attributes['version']

    @property
    def version(self) -> str:
        """ The complete schema version, including prefix and library name(if applicable).

        Returns:
            str: The complete schema version including library name and namespace.
        """
        libraries = self.library.split(",")
        versions = self.version_number.split(",")
        namespace = self._namespace
        combined_versions = [f"{namespace}{version}" if not library else f"{namespace}{library}_{version}"
                             for library, version in zip(libraries, versions)]

        return ",".join(combined_versions)

    @property
    def library(self) -> str:
        """ The name of this library schema if one exists.

        Returns:
            str: Library name if any.
        """
        return self.header_attributes.get(LIBRARY_ATTRIBUTE, "")

    @property
    def schema_namespace(self) -> str:
        """ Returns the schema namespace prefix.

        Returns:
            str: The schema namespace prefix.
        """
        return self._namespace

    def can_save(self) -> bool:
        """ Returns if it's legal to save this schema.

        You cannot save schemas loaded as merged from multiple library schemas.

        Returns:
            bool: True if this can be saved.
        """
        return not self.library or "," not in self.library

    @property
    def with_standard(self) -> str:
        """ The version of the base schema this is extended from, if it exists.

        Returns:
            str: HED version or empty string.
        """
        return self.header_attributes.get(WITH_STANDARD_ATTRIBUTE, "")

    @property
    def merged(self) -> bool:
        """ Returns if this schema was loaded from a merged file.

        Returns:
            bool: True if file was loaded from a merged file.
        """
        return not self.header_attributes.get(UNMERGED_ATTRIBUTE, "")

    @property
    def tags(self) -> "HedSchemaTagSection":
        """ Return the tag schema section.

        Returns:
            HedSchemaTagSection: The tag section.
        """
        return self._sections[HedSectionKey.Tags]

    @property
    def unit_classes(self) -> "HedSchemaUnitClassSection":
        """ Return the unit classes schema section.

        Returns:
            HedSchemaUnitClassSection: The unit classes section.
        """
        return self._sections[HedSectionKey.UnitClasses]

    @property
    def units(self) -> "HedSchemaUnitSection":
        """ Return the unit schema section.

        Returns:
            HedSchemaUnitSection: The unit section.
        """
        return self._sections[HedSectionKey.Units]

    @property
    def unit_modifiers(self) -> "HedSchemaSection":
        """ Return the modifiers classes schema section.

        Returns:
            HedSchemaSection: The unit modifiers section.
        """
        return self._sections[HedSectionKey.UnitModifiers]

    @property
    def value_classes(self) -> "HedSchemaSection":
        """ Return the value classes schema section.

        Returns:
            HedSchemaSection: The value classes section.
        """
        return self._sections[HedSectionKey.ValueClasses]

    @property
    def attributes(self) -> "HedSchemaSection":
        """ Return the attributes schema section.

        Returns:
            HedSchemaSection: The attributes section.
        """
        return self._sections[HedSectionKey.Attributes]

    @property
    def properties(self) -> "HedSchemaSection":
        """ Return the properties schema section.

        Returns:
            HedSchemaSection: The properties section.
        """
        return self._sections[HedSectionKey.Properties]

    def get_schema_versions(self) -> list[str]:
        """ A list of HED version strings including namespace and library name if any of this schema.

        Returns:
            list[str]: The complete version of this schema including library name and namespace.
        """
        return [self.get_formatted_version()]

    def get_formatted_version(self) -> str:
        """ The HED version string including namespace and library name if any of this schema.

        Returns:
            str: A json formatted string of the complete version of this schema including library name and namespace.
        """
        return json.dumps(self.version)

    def get_save_header_attributes(self, save_merged: bool = False) -> dict:
        """ Returns the attributes that should be saved.

        Parameters:
            save_merged (bool): Whether to save as merged schema.

        Returns:
            dict: The header attributes dictionary.
        """
        sort_to_start = "!!!!!!!!!!!!!!"
        header_attributes = dict(sorted(self.header_attributes.items(),
                                        key=lambda x: sort_to_start if x[0] == VERSION_ATTRIBUTE else x[0],
                                        reverse=False))
        if save_merged:
            header_attributes.pop(UNMERGED_ATTRIBUTE, None)
        else:
            # make sure it's the last attribute(just to make sure it's in an order)
            header_attributes.pop(UNMERGED_ATTRIBUTE, None)
            header_attributes[UNMERGED_ATTRIBUTE] = "True"

        return header_attributes

    def schema_for_namespace(self, namespace: str) -> Union["HedSchema", None]:
        """ Return HedSchema object for this namespace.

        Parameters:
            namespace (str): The schema library name namespace.

        Returns:
            HedSchema or None: The HED schema object for this schema, or None if namespace doesn't match.
        """
        if self._namespace != namespace:
            return None
        return self

    @property
    def valid_prefixes(self) -> list[str]:
        """ Return a list of all prefixes this schema will accept

        Returns:
            list[str]:   A list of valid tag prefixes for this schema.

        Notes:
            - The return value is always length 1 if using a HedSchema.
        """
        return [self._namespace]

    def get_extras(self, extras_key) -> Union[DataFrame, None]:
        """ Get the extras corresponding to the given key

        Parameters:
            extras_key (str): The key to check for in the extras dictionary.

        Returns:
            Union[DataFrame, None]: The DataFrame for this extras key, or None if it doesn't exist or is empty.
        """
        if not hasattr(self, 'extras') or not extras_key in self.extras:
            return None
        externals = self.extras[extras_key]
        if externals.empty:
            return None
        return externals

    # ===============================================
    # Creation and saving functions
    # ===============================================

    # todo: we may want to collapse these 6 functions into one like this
    # def serialize(self, filename=None, save_merged=False, file_format=whatever is default):
    #     pass

    def get_as_mediawiki_string(self, save_merged=False) -> str:
        """ Return the schema to a mediawiki string.

        Parameters:
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.

        Returns:
            str: The schema as a string in mediawiki format.

        """
        output_strings = Schema2Wiki().process_schema(self, save_merged)
        return '\n'.join(output_strings)

    def get_as_xml_string(self, save_merged=True) -> str:
        """ Return the schema to an XML string.

        Parameters:
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.

        Returns:
            str: The schema as an XML string.

        """
        xml_tree = Schema2XML().process_schema(self, save_merged)
        return schema_util.xml_element_2_str(xml_tree)

    def get_as_dataframes(self, save_merged=False) -> dict[DataFrame]:
        """ Get a dict of dataframes representing this file

        Parameters:
            save_merged (bool): If True, returns DFs as if merged with standard.

        Returns:
            dict[DataFrame]: A dict of dataframes you can load as a schema.
        """
        output_dfs = Schema2DF().process_schema(self, save_merged)
        return output_dfs

    def save_as_mediawiki(self, filename, save_merged=False):
        """ Save as mediawiki to a file.

        Parameters:
            filename (str): Save location.
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.


        Raises:
            OSError: File cannot be saved for some reason.
        """
        output_strings = Schema2Wiki().process_schema(self, save_merged)
        with open(filename, mode='w', encoding='utf-8') as opened_file:
            for string in output_strings:
                opened_file.write(string)
                opened_file.write('\n')

    def save_as_xml(self, filename, save_merged=True):
        """ Save as XML to a file.

        Parameters:
            filename (str): Save location.
            save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.


        Raises:
            OSError: File cannot be saved for some reason.
        """
        xml_tree = Schema2XML().process_schema(self, save_merged)
        with open(filename, mode='w', encoding='utf-8') as opened_file:
            xml_string = schema_util.xml_element_2_str(xml_tree)
            opened_file.write(xml_string)

    def save_as_dataframes(self, base_filename, save_merged=False):
        """ Save as dataframes to a folder of files.

            If base_filename has a .tsv suffix, save directly to the indicated location.
            If base_filename is a directory(does NOT have a .tsv suffix), save the contents into a directory named that.
            The subfiles are named the same.  e.g. HED8.3.0/HED8.3.0_Tag.tsv

        Parameters:
            base_filename (str): Save filename. A suffix will be added to most, e.g. _Tag
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.


        Raises:
            OSError: File cannot be saved for some reason.
        """
        output_dfs = Schema2DF().process_schema(self, save_merged)
        if hasattr(self, 'extras') and self.extras:
           output_dfs.update(self.extras)
        df_util.save_dataframes(base_filename, output_dfs)

    def set_schema_prefix(self, schema_namespace):
        """ Set library namespace associated for this schema.

        Parameters:
            schema_namespace (str): Should be empty, or end with a colon.(Colon will be automated added if missing).

        :raises HedFileError:
            - The prefix is invalid
        """
        if schema_namespace and schema_namespace[-1] != ":":
            schema_namespace += ":"

        if schema_namespace and not schema_namespace[:-1].isalpha():
            raise HedFileError(HedExceptions.INVALID_LIBRARY_PREFIX,
                               "Schema namespace must contain only alpha characters",
                               self.filename)

        self._namespace = schema_namespace

    def __eq__(self, other):
        """ Return True if these schema match exactly.

        Parameters:
            other (HedSchema): The schema to be compared.

        Returns:
            bool: True if other exactly matches this schema.

        Notes:
            - Matches must include attributes, tag names, etc.

        """
        if other is None:
            return False
        if self.get_save_header_attributes() != other.get_save_header_attributes():
            # print(f"Header attributes not equal: '{self.get_save_header_attributes()}' vs '{other.get_save_header_attributes()}'")
            return False
        if self.has_duplicates() != other.has_duplicates():
            # print(f"Duplicates: '{self.has_duplicates()}' vs '{other.has_duplicates()}'")
            return False
        if self.prologue.strip() != other.prologue.strip():
            # print(f"PROLOGUE NOT EQUAL: '{self.prologue.strip()}' vs '{other.prologue.strip()}'")
            return False
        if self.epilogue.strip() != other.epilogue.strip():
            # print(f"EPILOGUE NOT EQUAL: '{self.epilogue.strip()}' vs '{other.epilogue.strip()}'")
            return False
        if self._sections != other._sections:
            # This block is useful for debugging when modifying the schema class itself.
            # for section1, section2 in zip(self._sections.values(), other._sections.values()):
            #     if section1 != section2:
            #         dict1 = section1.all_names
            #         dict2 = section2.all_names
            #         if dict1 != dict2:
            #             print(f"DICT {section1._section_key} NOT EQUAL")
            #             key_union = set(list(dict1.keys()) + list(dict2.keys()))
            #             for key in key_union:
            #                 if key not in dict1:
            #                     print(f"{key} not in dict1")
            #                     continue
            #                 if key not in dict2:
            #                     print(f"{key} not in dict2")
            #                     continue
            #                 if dict1[key] != dict2[key]:
            #                     s = f"{key} unmatched: '{str(dict1[key].name)}' vs '{str(dict2[key].name)}'"
            #                     print(s)
            return False
        if self._namespace != other._namespace:
            # print(f"NAMESPACE NOT EQUAL: '{self._namespace}' vs '{other._namespace}'")
            return False
        return True

    def __getitem__(self, section_key):
        return self._sections[section_key]

    def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
        """ Check for HED3 compliance of this schema.

        Parameters:
            check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
            name (str): If present, use as the filename for context, rather than using the actual filename.
                        Useful for temp filenames when supporting web services.
            error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

        Returns:
            list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
        """
        from hed.schema import schema_compliance
        return schema_compliance.check_compliance(self, check_for_warnings, name, error_handler)

    def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list["HedSchemaEntry"]:
        """ Return tag entries with the given attribute.

        Parameters:
            attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
            key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

        Returns:
            list[HedSchemaEntry]: A list of all tags with this attribute.

        Notes:
            - The result is cached so will be fast after first call.
        """
        return self._sections[key_class].get_entries_with_attribute(attribute, return_name_only=True,
                                                                    schema_namespace=self._namespace)

    def get_tag_entry(self, name: str, key_class=HedSectionKey.Tags, schema_namespace: str = "") -> Union["HedSchemaEntry", None]:
        """ Return the schema entry for this tag, if one exists.

        Parameters:
            name (str): Any form of basic tag(or other section entry) to look up.
                This will not handle extensions or similar.
                If this is a tag, it can have a schema namespace, but it's not required
            key_class (HedSectionKey or str):  The type of entry to return.
            schema_namespace (str): Only used on Tags.  If incorrect, will return None.

        Returns:
            HedSchemaEntry or None: The schema entry for the given tag, or None if not found.
        """
        if key_class == HedSectionKey.Tags:
            if schema_namespace != self._namespace:
                return None
            if name.startswith(self._namespace):
                name = name[len(self._namespace):]

        return self._get_tag_entry(name, key_class)

    def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
        """ Find the schema entry for a given source tag.

        Parameters:
            tag (str, HedTag):     Any form of tag to look up.  Can have an extension, value, etc.
            schema_namespace (str):  The schema namespace of the tag, if any.

        Returns:
            tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
            - The located tag entry for this tag.
            - The remainder of the tag that isn't part of the base tag.
            - A list of errors while converting.

        Notes:
            Works left to right (which is mostly relevant for errors).

        """
        if schema_namespace != self._namespace:
            validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                          schema_namespace, self.valid_prefixes)
            return None, None, validation_issues
        return self._find_tag_entry(tag, schema_namespace)


    # ===============================================
    # Private utility functions for getting/finding tags
    # ===============================================
    def _get_tag_entry(self, name, key_class=HedSectionKey.Tags):
        """ Return the schema entry for this tag, if one exists.

        Parameters:
            name (str): Any form of basic tag(or other section entry) to look up.
                This will not handle extensions or similar.
            key_class (HedSectionKey or str):  The type of entry to return.

        Returns:
            HedSchemaEntry: The schema entry for the given tag.

        """
        return self._sections[key_class].get(name)

    def _find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
        """ Find the schema entry for a given source tag.

        Parameters:
            tag (str, HedTag):     Any form of tag to look up.  Can have an extension, value, etc.
            schema_namespace (str):  The schema namespace of the tag, if any.

        Returns:
            tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
            - The located tag entry for this tag.
            - The remainder of the tag that isn't part of the base tag.
            - A list of errors while converting.

        Notes:
            Works left to right (which is mostly relevant for errors).

        """
        clean_tag = str(tag)
        namespace = schema_namespace
        clean_tag = clean_tag[len(namespace):]
        working_tag = clean_tag.casefold()

        # Most tags are in the schema directly, so test that first
        found_entry = self._get_tag_entry(working_tag)
        if found_entry:
            # this handles the one special case where the actual tag contains "/#" instead of something specific.
            if working_tag.endswith("/#"):
                remainder = working_tag[-2:]
            else:
                remainder = ""

            return found_entry, remainder, []

        prefix_tag_adj = len(namespace)

        try:
            found_entry, current_slash_index = self._find_tag_subfunction(tag, working_tag, prefix_tag_adj)
        except self._TagIdentifyError as e:
            issue = e.issue
            return None, None, issue

        remainder = None
        if current_slash_index != -1:
            remainder = clean_tag[current_slash_index:]
        if remainder and found_entry.takes_value_child_entry:
            found_entry = found_entry.takes_value_child_entry

        return found_entry, remainder, []

    def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj):
        """Finds the base tag and remainder from the left, raising exception on issues"""
        current_slash_index = -1
        current_entry = None
        # Loop left to right, checking each word.  Once we find an invalid word, we stop.
        while True:
            next_index = working_tag.find("/", current_slash_index + 1)
            if next_index == -1:
                next_index = len(working_tag)
            parent_name = working_tag[:next_index]
            parent_entry = self._get_tag_entry(parent_name)

            if not parent_entry:
                # We haven't found any tag at all yet
                if current_entry is None:
                    error = ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
                                                      tag,
                                                      index_in_tag=prefix_tag_adj,
                                                      index_in_tag_end=prefix_tag_adj + next_index)
                    raise self._TagIdentifyError(error)
                # If this is not a takes value node, validate each term in the remainder.
                if not current_entry.takes_value_child_entry:
                    # This will raise _TagIdentifyError on any issues
                    self._validate_remaining_terms(tag, working_tag, prefix_tag_adj, current_slash_index)
                break

            current_entry = parent_entry
            current_slash_index = next_index
            if next_index == len(working_tag):
                break

        return current_entry, current_slash_index

    def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index):
        """ Validates the terms past current_slash_index.

        :raises _TagIdentifyError:
            - One of the extension terms already exists as a schema term.
        """
        child_names = working_tag[current_slash_index + 1:].split("/")
        word_start_index = current_slash_index + 1 + prefix_tag_adj
        for name in child_names:
            if self._get_tag_entry(name):
                error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
                                                  tag,
                                                  index_in_tag=word_start_index,
                                                  index_in_tag_end=word_start_index + len(name),
                                                  expected_parent_tag=self.tags[name].name)
                raise self._TagIdentifyError(error)
            word_start_index += len(name) + 1

    def has_duplicates(self):
        """Returns the first duplicate tag/unit/etc. if any section has a duplicate name"""
        for section in self._sections.values():
            has_duplicates = bool(section.duplicate_names)
            if has_duplicates:
                # Return first entry of dict
                return next(iter(section.duplicate_names))

        return False

    # ===============================================
    # Semi-private creation finalizing functions
    # ===============================================
    def finalize_dictionaries(self):
        """ Call to finish loading. """
        # Kludge - Reset this here so it recalculates while having all properties
        self._schema83 = None
        self._update_all_entries()

    def _update_all_entries(self):
        """ Call finalize_entry on every schema entry(tag, unit, etc). """
        for key_class, section in self._sections.items():
            self._initialize_attributes(key_class)
            section._finalize_section(self)

    def _initialize_attributes(self, key_class):
        """ Set the valid attributes for a section.

        Parameters:
            key_class (HedSectionKey): The section key for the section to update.

        """
        self._sections[key_class].valid_attributes = self._get_attributes_for_section(key_class)

    # ===============================================
    # Getters used to write out schema primarily.
    # ===============================================
    def get_tag_attribute_names_old(self) -> dict[str, HedSchemaEntry]:
        """ Return a dict of all allowed tag attributes.

        Returns:
            dict[str, HedSchemaEntry]: A dictionary whose keys are attribute names and values are HedSchemaEntry object.

        """
        return {tag_entry.name: tag_entry for tag_entry in self._sections[HedSectionKey.Attributes].values()
                if not tag_entry.has_attribute(HedKeyOld.UnitClassProperty)
                and not tag_entry.has_attribute(HedKeyOld.UnitProperty)
                and not tag_entry.has_attribute(HedKeyOld.UnitModifierProperty)
                and not tag_entry.has_attribute(HedKeyOld.ValueClassProperty)}

    # ===============================================
    # Private utility functions
    # ===============================================
    @staticmethod
    def _create_empty_sections():
        dictionaries = {}
        # Add main sections
        dictionaries[HedSectionKey.Properties] = HedSchemaSection(HedSectionKey.Properties)
        dictionaries[HedSectionKey.Attributes] = HedSchemaSection(HedSectionKey.Attributes)
        dictionaries[HedSectionKey.UnitModifiers] = HedSchemaSection(HedSectionKey.UnitModifiers)
        dictionaries[HedSectionKey.Units] = HedSchemaUnitSection(HedSectionKey.Units)
        dictionaries[HedSectionKey.UnitClasses] = HedSchemaUnitClassSection(HedSectionKey.UnitClasses)
        dictionaries[HedSectionKey.ValueClasses] = HedSchemaSection(HedSectionKey.ValueClasses)
        dictionaries[HedSectionKey.Tags] = HedSchemaTagSection(HedSectionKey.Tags, case_sensitive=False)

        return dictionaries

    def _get_modifiers_for_unit(self, unit):
        """ Return the valid modifiers for the given unit

        Parameters:
            unit (str): A known unit.

        Returns:
            derived_unit_list(list of HedSchemaEntry): The derived units for this unit

        Notes:
            This is a lower level one that doesn't rely on the Unit entries being fully setup.
        """
        unit_entry = self.get_tag_entry(unit, HedSectionKey.Units)
        if unit_entry is None:
            return []
        is_si_unit = unit_entry.has_attribute(HedKey.SIUnit)
        is_unit_symbol = unit_entry.has_attribute(HedKey.UnitSymbol)
        if not is_si_unit:
            return []
        if is_unit_symbol:
            modifier_attribute_name = HedKey.SIUnitSymbolModifier
        else:
            modifier_attribute_name = HedKey.SIUnitModifier
        valid_modifiers = self.unit_modifiers.get_entries_with_attribute(modifier_attribute_name)
        return valid_modifiers

    def _add_element_property_attributes(self, attribute_dict, attribute_name):
        attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items()
                      if entry.has_attribute(attribute_name)}

        attribute_dict.update(attributes)

    def _get_attributes_for_section(self, key_class):
        """Return the valid attributes for this section.

        Parameters:
            key_class (HedSectionKey): The HedKey for this section.

        Returns:
            dict: A dict of all the attributes for this section.
        """
        element_prop_key = HedKey.ElementDomain if self.schema_83_props else HedKeyOld.ElementProperty

        # Common logic for Attributes and Properties
        if key_class in [HedSectionKey.Attributes, HedSectionKey.Properties]:
            prop_added_dict = {}
            if key_class == HedSectionKey.Attributes:
                prop_added_dict = {key: value for key, value in self._sections[HedSectionKey.Properties].items()}
            self._add_element_property_attributes(prop_added_dict, element_prop_key)
            return prop_added_dict

        if self.schema_83_props:
            attrib_classes = {
                HedSectionKey.UnitClasses: HedKey.UnitClassDomain,
                HedSectionKey.Units: HedKey.UnitDomain,
                HedSectionKey.UnitModifiers: HedKey.UnitModifierDomain,
                HedSectionKey.ValueClasses: HedKey.ValueClassDomain,
                HedSectionKey.Tags: HedKey.TagDomain
            }
        else:
            attrib_classes = {
                HedSectionKey.UnitClasses: HedKeyOld.UnitClassProperty,
                HedSectionKey.Units: HedKeyOld.UnitProperty,
                HedSectionKey.UnitModifiers: HedKeyOld.UnitModifierProperty,
                HedSectionKey.ValueClasses: HedKeyOld.ValueClassProperty
            }
            if key_class == HedSectionKey.Tags:
                return self.get_tag_attribute_names_old()

        # Retrieve attributes based on the determined class
        attrib_class = attrib_classes.get(key_class)
        if not attrib_class:
            return []

        attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items()
                      if entry.has_attribute(attrib_class) or entry.has_attribute(element_prop_key)}
        return attributes

    # ===============================================
    # Semi private function used to create a schema in memory(usually from a source file)
    # ===============================================
    def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
        section = self._sections[key_class]
        return section._add_to_dict(long_tag_name, new_entry)

    def _create_tag_entry(self, long_tag_name, key_class):
        section = self._sections[key_class]
        return section._create_tag_entry(long_tag_name)

    class _TagIdentifyError(Exception):
        """Used internally to note when a tag cannot be identified."""
        def __init__(self, issue):
            self.issue = issue

attributes `property` ¶

attributes: 'HedSchemaSection'

Return the attributes schema section.

Returns:

Name	Type	Description
`HedSchemaSection`	`'HedSchemaSection'`	The attributes section.

library `property` ¶

library: str

The name of this library schema if one exists.

Returns:

Name	Type	Description
`str`	`str`	Library name if any.

merged `property` ¶

merged: bool

Returns if this schema was loaded from a merged file.

Returns:

Name	Type	Description
`bool`	`bool`	True if file was loaded from a merged file.

properties `property` ¶

properties: 'HedSchemaSection'

Return the properties schema section.

Returns:

Name	Type	Description
`HedSchemaSection`	`'HedSchemaSection'`	The properties section.

schema_namespace `property` ¶

schema_namespace: str

Returns the schema namespace prefix.

Returns:

Name	Type	Description
`str`	`str`	The schema namespace prefix.

tags `property` ¶

tags: 'HedSchemaTagSection'

Return the tag schema section.

Returns:

Name	Type	Description
`HedSchemaTagSection`	`'HedSchemaTagSection'`	The tag section.

unit_classes `property` ¶

unit_classes: 'HedSchemaUnitClassSection'

Return the unit classes schema section.

Returns:

Name	Type	Description
`HedSchemaUnitClassSection`	`'HedSchemaUnitClassSection'`	The unit classes section.

unit_modifiers `property` ¶

unit_modifiers: 'HedSchemaSection'

Return the modifiers classes schema section.

Returns:

Name	Type	Description
`HedSchemaSection`	`'HedSchemaSection'`	The unit modifiers section.

units `property` ¶

units: 'HedSchemaUnitSection'

Return the unit schema section.

Returns:

Name	Type	Description
`HedSchemaUnitSection`	`'HedSchemaUnitSection'`	The unit section.

valid_prefixes `property` ¶

valid_prefixes: list[str]

Return a list of all prefixes this schema will accept

Returns:

Type	Description
`list[str]`	list[str]: A list of valid tag prefixes for this schema.

Notes

The return value is always length 1 if using a HedSchema.

value_classes `property` ¶

value_classes: 'HedSchemaSection'

Return the value classes schema section.

Returns:

Name	Type	Description
`HedSchemaSection`	`'HedSchemaSection'`	The value classes section.

version `property` ¶

version: str

The complete schema version, including prefix and library name(if applicable).

Returns:

Name	Type	Description
`str`	`str`	The complete schema version including library name and namespace.

version_number `property` ¶

version_number: str

The HED version of this schema.

Returns:

Name	Type	Description
`str`	`str`	The version of this schema.

with_standard `property` ¶

with_standard: str

The version of the base schema this is extended from, if it exists.

Returns:

Name	Type	Description
`str`	`str`	HED version or empty string.

can_save ¶

can_save() -> bool

Returns if it's legal to save this schema.

You cannot save schemas loaded as merged from multiple library schemas.

Returns:

Name	Type	Description
`bool`	`bool`	True if this can be saved.

Source code in hed/schema/hed_schema.py

def can_save(self) -> bool:
    """ Returns if it's legal to save this schema.

    You cannot save schemas loaded as merged from multiple library schemas.

    Returns:
        bool: True if this can be saved.
    """
    return not self.library or "," not in self.library

check_compliance ¶

check_compliance(
    check_for_warnings=True, name=None, error_handler=None
) -> list[dict]

Check for HED3 compliance of this schema.

Parameters:

Name	Type	Description	Default
`check_for_warnings`	`bool`	If True, checks for formatting issues like invalid characters, capitalization.	`True`
`name`	`str`	If present, use as the filename for context, rather than using the actual filename. Useful for temp filenames when supporting web services.	`None`
`error_handler`	`ErrorHandler or None`	Used to report errors. Uses a default one if none passed in.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.

Source code in hed/schema/hed_schema.py

def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
    """ Check for HED3 compliance of this schema.

    Parameters:
        check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
        name (str): If present, use as the filename for context, rather than using the actual filename.
                    Useful for temp filenames when supporting web services.
        error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

    Returns:
        list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
    """
    from hed.schema import schema_compliance
    return schema_compliance.check_compliance(self, check_for_warnings, name, error_handler)

finalize_dictionaries ¶

finalize_dictionaries()

Call to finish loading.

Source code in hed/schema/hed_schema.py

def finalize_dictionaries(self):
    """ Call to finish loading. """
    # Kludge - Reset this here so it recalculates while having all properties
    self._schema83 = None
    self._update_all_entries()

find_tag_entry ¶

find_tag_entry(
    tag, schema_namespace=""
) -> tuple[
    Union["HedTagEntry", None], Union[str, None], list[dict]
]

Find the schema entry for a given source tag.

Parameters:

Name	Type	Description	Default
`tag`	`(str, HedTag)`	Any form of tag to look up. Can have an extension, value, etc.	required
`schema_namespace`	`str`	The schema namespace of the tag, if any.	`''`

Returns:

Type	Description
`Union['HedTagEntry', None]`	tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
`Union[str, None]`	The located tag entry for this tag.
`list[dict]`	The remainder of the tag that isn't part of the base tag.
`tuple[Union['HedTagEntry', None], Union[str, None], list[dict]]`	A list of errors while converting.

Notes

Works left to right (which is mostly relevant for errors).

Source code in hed/schema/hed_schema.py

def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
    """ Find the schema entry for a given source tag.

    Parameters:
        tag (str, HedTag):     Any form of tag to look up.  Can have an extension, value, etc.
        schema_namespace (str):  The schema namespace of the tag, if any.

    Returns:
        tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
        - The located tag entry for this tag.
        - The remainder of the tag that isn't part of the base tag.
        - A list of errors while converting.

    Notes:
        Works left to right (which is mostly relevant for errors).

    """
    if schema_namespace != self._namespace:
        validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                      schema_namespace, self.valid_prefixes)
        return None, None, validation_issues
    return self._find_tag_entry(tag, schema_namespace)

get_as_dataframes ¶

get_as_dataframes(save_merged=False) -> dict[DataFrame]

Get a dict of dataframes representing this file

Parameters:

Name	Type	Description	Default
`save_merged`	`bool`	If True, returns DFs as if merged with standard.	`False`

Returns:

Type	Description
`dict[DataFrame]`	dict[DataFrame]: A dict of dataframes you can load as a schema.

Source code in hed/schema/hed_schema.py

def get_as_dataframes(self, save_merged=False) -> dict[DataFrame]:
    """ Get a dict of dataframes representing this file

    Parameters:
        save_merged (bool): If True, returns DFs as if merged with standard.

    Returns:
        dict[DataFrame]: A dict of dataframes you can load as a schema.
    """
    output_dfs = Schema2DF().process_schema(self, save_merged)
    return output_dfs

get_as_mediawiki_string ¶

get_as_mediawiki_string(save_merged=False) -> str

Return the schema to a mediawiki string.

Parameters:

Name	Type	Description	Default
`save_merged`	`bool`	If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.	`False`

Returns:

Name	Type	Description
`str`	`str`	The schema as a string in mediawiki format.

Source code in hed/schema/hed_schema.py

def get_as_mediawiki_string(self, save_merged=False) -> str:
    """ Return the schema to a mediawiki string.

    Parameters:
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.

    Returns:
        str: The schema as a string in mediawiki format.

    """
    output_strings = Schema2Wiki().process_schema(self, save_merged)
    return '\n'.join(output_strings)

get_as_xml_string ¶

get_as_xml_string(save_merged=True) -> str

Return the schema to an XML string.

Parameters:

Name	Type	Description	Default
`save_merged`	`bool`	If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.	`True`

Returns:

Name	Type	Description
`str`	`str`	The schema as an XML string.

Source code in hed/schema/hed_schema.py

def get_as_xml_string(self, save_merged=True) -> str:
    """ Return the schema to an XML string.

    Parameters:
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.

    Returns:
        str: The schema as an XML string.

    """
    xml_tree = Schema2XML().process_schema(self, save_merged)
    return schema_util.xml_element_2_str(xml_tree)

get_extras ¶

get_extras(extras_key) -> Union[DataFrame, None]

Get the extras corresponding to the given key

Parameters:

Name	Type	Description	Default
`extras_key`	`str`	The key to check for in the extras dictionary.	required

Returns:

Type	Description
`Union[DataFrame, None]`	Union[DataFrame, None]: The DataFrame for this extras key, or None if it doesn't exist or is empty.

Source code in hed/schema/hed_schema.py

def get_extras(self, extras_key) -> Union[DataFrame, None]:
    """ Get the extras corresponding to the given key

    Parameters:
        extras_key (str): The key to check for in the extras dictionary.

    Returns:
        Union[DataFrame, None]: The DataFrame for this extras key, or None if it doesn't exist or is empty.
    """
    if not hasattr(self, 'extras') or not extras_key in self.extras:
        return None
    externals = self.extras[extras_key]
    if externals.empty:
        return None
    return externals

get_formatted_version ¶

get_formatted_version() -> str

The HED version string including namespace and library name if any of this schema.

Returns:

Name	Type	Description
`str`	`str`	A json formatted string of the complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema.py

def get_formatted_version(self) -> str:
    """ The HED version string including namespace and library name if any of this schema.

    Returns:
        str: A json formatted string of the complete version of this schema including library name and namespace.
    """
    return json.dumps(self.version)

get_save_header_attributes ¶

get_save_header_attributes(
    save_merged: bool = False,
) -> dict

Returns the attributes that should be saved.

Parameters:

Name	Type	Description	Default
`save_merged`	`bool`	Whether to save as merged schema.	`False`

Returns:

Name	Type	Description
`dict`	`dict`	The header attributes dictionary.

Source code in hed/schema/hed_schema.py

def get_save_header_attributes(self, save_merged: bool = False) -> dict:
    """ Returns the attributes that should be saved.

    Parameters:
        save_merged (bool): Whether to save as merged schema.

    Returns:
        dict: The header attributes dictionary.
    """
    sort_to_start = "!!!!!!!!!!!!!!"
    header_attributes = dict(sorted(self.header_attributes.items(),
                                    key=lambda x: sort_to_start if x[0] == VERSION_ATTRIBUTE else x[0],
                                    reverse=False))
    if save_merged:
        header_attributes.pop(UNMERGED_ATTRIBUTE, None)
    else:
        # make sure it's the last attribute(just to make sure it's in an order)
        header_attributes.pop(UNMERGED_ATTRIBUTE, None)
        header_attributes[UNMERGED_ATTRIBUTE] = "True"

    return header_attributes

get_schema_versions ¶

get_schema_versions() -> list[str]

A list of HED version strings including namespace and library name if any of this schema.

Returns:

Type	Description
`list[str]`	list[str]: The complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema.py

def get_schema_versions(self) -> list[str]:
    """ A list of HED version strings including namespace and library name if any of this schema.

    Returns:
        list[str]: The complete version of this schema including library name and namespace.
    """
    return [self.get_formatted_version()]

get_tag_attribute_names_old ¶

get_tag_attribute_names_old() -> dict[str, HedSchemaEntry]

Return a dict of all allowed tag attributes.

Returns:

Type	Description
`dict[str, HedSchemaEntry]`	dict[str, HedSchemaEntry]: A dictionary whose keys are attribute names and values are HedSchemaEntry object.

Source code in hed/schema/hed_schema.py

def get_tag_attribute_names_old(self) -> dict[str, HedSchemaEntry]:
    """ Return a dict of all allowed tag attributes.

    Returns:
        dict[str, HedSchemaEntry]: A dictionary whose keys are attribute names and values are HedSchemaEntry object.

    """
    return {tag_entry.name: tag_entry for tag_entry in self._sections[HedSectionKey.Attributes].values()
            if not tag_entry.has_attribute(HedKeyOld.UnitClassProperty)
            and not tag_entry.has_attribute(HedKeyOld.UnitProperty)
            and not tag_entry.has_attribute(HedKeyOld.UnitModifierProperty)
            and not tag_entry.has_attribute(HedKeyOld.ValueClassProperty)}

get_tag_entry ¶

get_tag_entry(
    name: str,
    key_class=HedSectionKey.Tags,
    schema_namespace: str = "",
) -> Union["HedSchemaEntry", None]

Return the schema entry for this tag, if one exists.

Parameters:

Name	Type	Description	Default
`name`	`str`	Any form of basic tag(or other section entry) to look up. This will not handle extensions or similar. If this is a tag, it can have a schema namespace, but it's not required	required
`key_class`	`HedSectionKey or str`	The type of entry to return.	`Tags`
`schema_namespace`	`str`	Only used on Tags. If incorrect, will return None.	`''`

Returns:

Type	Description
`Union['HedSchemaEntry', None]`	HedSchemaEntry or None: The schema entry for the given tag, or None if not found.

Source code in hed/schema/hed_schema.py

def get_tag_entry(self, name: str, key_class=HedSectionKey.Tags, schema_namespace: str = "") -> Union["HedSchemaEntry", None]:
    """ Return the schema entry for this tag, if one exists.

    Parameters:
        name (str): Any form of basic tag(or other section entry) to look up.
            This will not handle extensions or similar.
            If this is a tag, it can have a schema namespace, but it's not required
        key_class (HedSectionKey or str):  The type of entry to return.
        schema_namespace (str): Only used on Tags.  If incorrect, will return None.

    Returns:
        HedSchemaEntry or None: The schema entry for the given tag, or None if not found.
    """
    if key_class == HedSectionKey.Tags:
        if schema_namespace != self._namespace:
            return None
        if name.startswith(self._namespace):
            name = name[len(self._namespace):]

    return self._get_tag_entry(name, key_class)

get_tags_with_attribute ¶

get_tags_with_attribute(
    attribute, key_class=HedSectionKey.Tags
) -> list["HedSchemaEntry"]

Return tag entries with the given attribute.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	A tag attribute. Eg HedKey.ExtensionAllowed	required
`key_class`	`HedSectionKey`	The HedSectionKey for the section to retrieve from.	`Tags`

Returns:

Type	Description
`list['HedSchemaEntry']`	list[HedSchemaEntry]: A list of all tags with this attribute.

Notes

The result is cached so will be fast after first call.

Source code in hed/schema/hed_schema.py

def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list["HedSchemaEntry"]:
    """ Return tag entries with the given attribute.

    Parameters:
        attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
        key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

    Returns:
        list[HedSchemaEntry]: A list of all tags with this attribute.

    Notes:
        - The result is cached so will be fast after first call.
    """
    return self._sections[key_class].get_entries_with_attribute(attribute, return_name_only=True,
                                                                schema_namespace=self._namespace)

has_duplicates ¶

has_duplicates()

Returns the first duplicate tag/unit/etc. if any section has a duplicate name

Source code in hed/schema/hed_schema.py

def has_duplicates(self):
    """Returns the first duplicate tag/unit/etc. if any section has a duplicate name"""
    for section in self._sections.values():
        has_duplicates = bool(section.duplicate_names)
        if has_duplicates:
            # Return first entry of dict
            return next(iter(section.duplicate_names))

    return False

save_as_dataframes ¶

save_as_dataframes(base_filename, save_merged=False)

Save as dataframes to a folder of files.

If base_filename has a .tsv suffix, save directly to the indicated location.
If base_filename is a directory(does NOT have a .tsv suffix), save the contents into a directory named that.
The subfiles are named the same.  e.g. HED8.3.0/HED8.3.0_Tag.tsv

Parameters:

Name	Type	Description	Default
`base_filename`	`str`	Save filename. A suffix will be added to most, e.g. _Tag	required
`save_merged`	`bool`	If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.	`False`

Raises:

Type	Description
`OSError`	File cannot be saved for some reason.

Source code in hed/schema/hed_schema.py

def save_as_dataframes(self, base_filename, save_merged=False):
    """ Save as dataframes to a folder of files.

        If base_filename has a .tsv suffix, save directly to the indicated location.
        If base_filename is a directory(does NOT have a .tsv suffix), save the contents into a directory named that.
        The subfiles are named the same.  e.g. HED8.3.0/HED8.3.0_Tag.tsv

    Parameters:
        base_filename (str): Save filename. A suffix will be added to most, e.g. _Tag
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.


    Raises:
        OSError: File cannot be saved for some reason.
    """
    output_dfs = Schema2DF().process_schema(self, save_merged)
    if hasattr(self, 'extras') and self.extras:
       output_dfs.update(self.extras)
    df_util.save_dataframes(base_filename, output_dfs)

save_as_mediawiki ¶

save_as_mediawiki(filename, save_merged=False)

Save as mediawiki to a file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	Save location.	required
`save_merged`	`bool`	If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.	`False`

Raises:

Type	Description
`OSError`	File cannot be saved for some reason.

Source code in hed/schema/hed_schema.py

def save_as_mediawiki(self, filename, save_merged=False):
    """ Save as mediawiki to a file.

    Parameters:
        filename (str): Save location.
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.


    Raises:
        OSError: File cannot be saved for some reason.
    """
    output_strings = Schema2Wiki().process_schema(self, save_merged)
    with open(filename, mode='w', encoding='utf-8') as opened_file:
        for string in output_strings:
            opened_file.write(string)
            opened_file.write('\n')

save_as_xml ¶

save_as_xml(filename, save_merged=True)

Save as XML to a file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	Save location.	required
`save_merged`	`bool`	If true, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.	`True`

Raises:

Type	Description
`OSError`	File cannot be saved for some reason.

Source code in hed/schema/hed_schema.py

def save_as_xml(self, filename, save_merged=True):
    """ Save as XML to a file.

    Parameters:
        filename (str): Save location.
        save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.


    Raises:
        OSError: File cannot be saved for some reason.
    """
    xml_tree = Schema2XML().process_schema(self, save_merged)
    with open(filename, mode='w', encoding='utf-8') as opened_file:
        xml_string = schema_util.xml_element_2_str(xml_tree)
        opened_file.write(xml_string)

schema_for_namespace ¶

schema_for_namespace(
    namespace: str,
) -> Union["HedSchema", None]

Return HedSchema object for this namespace.

Parameters:

Name	Type	Description	Default
`namespace`	`str`	The schema library name namespace.	required

Returns:

Type	Description
`Union['HedSchema', None]`	HedSchema or None: The HED schema object for this schema, or None if namespace doesn't match.

Source code in hed/schema/hed_schema.py

def schema_for_namespace(self, namespace: str) -> Union["HedSchema", None]:
    """ Return HedSchema object for this namespace.

    Parameters:
        namespace (str): The schema library name namespace.

    Returns:
        HedSchema or None: The HED schema object for this schema, or None if namespace doesn't match.
    """
    if self._namespace != namespace:
        return None
    return self

set_schema_prefix ¶

set_schema_prefix(schema_namespace)

Set library namespace associated for this schema.

Parameters:

Name	Type	Description	Default
`schema_namespace`	`str`	Should be empty, or end with a colon.(Colon will be automated added if missing).	required

:raises HedFileError: - The prefix is invalid

Source code in hed/schema/hed_schema.py

def set_schema_prefix(self, schema_namespace):
    """ Set library namespace associated for this schema.

    Parameters:
        schema_namespace (str): Should be empty, or end with a colon.(Colon will be automated added if missing).

    :raises HedFileError:
        - The prefix is invalid
    """
    if schema_namespace and schema_namespace[-1] != ":":
        schema_namespace += ":"

    if schema_namespace and not schema_namespace[:-1].isalpha():
        raise HedFileError(HedExceptions.INVALID_LIBRARY_PREFIX,
                           "Schema namespace must contain only alpha characters",
                           self.filename)

    self._namespace = schema_namespace

Schema I/O¶

hed_schema_io ¶

Utilities for loading and outputting HED schema.

from_dataframes ¶

from_dataframes(
    schema_data, schema_namespace=None, name=None
) -> "HedSchema"

Create a schema from the given string.

Parameters:

Name	Type	Description	Default
`schema_data`	`dict`	A dict of DF_SUFFIXES:file_as_string_or_df Should have an entry for all values of DF_SUFFIXES.	required
`schema_namespace`	`(str, None)`	The name_prefix all tags in this schema will accept.	`None`
`name`	`str or None`	User supplied identifier for this schema	`None`

Returns:

Name	Type	Description
`HedSchema`	`'HedSchema'`	The loaded schema.

:raises HedFileError: - Empty/invalid parameters

Notes

The loading is determined by file type.

Source code in hed/schema/hed_schema_io.py

def from_dataframes(schema_data, schema_namespace=None, name=None) -> 'HedSchema':
    """ Create a schema from the given string.

    Parameters:
        schema_data (dict): A dict of DF_SUFFIXES:file_as_string_or_df
                              Should have an entry for all values of DF_SUFFIXES.
        schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
        name (str or None): User supplied identifier for this schema

    Returns:
        HedSchema:  The loaded schema.

    :raises HedFileError:
        - Empty/invalid parameters

    Notes:
        - The loading is determined by file type.

    """
    if not schema_data or not isinstance(schema_data, dict):
        raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty or non dict value passed to HedSchema.from_dataframes",
                           filename=name)

    hed_schema = SchemaLoaderDF.load_spreadsheet(schema_as_strings_or_df=schema_data, name=name)

    if schema_namespace:
        hed_schema.set_schema_prefix(schema_namespace=schema_namespace)

    return hed_schema

from_string ¶

from_string(
    schema_string,
    schema_format=".xml",
    schema_namespace=None,
    schema=None,
    name=None,
)

Create a schema from the given string.

Parameters:

Name	Type	Description	Default
`schema_string`	`str`	An XML or mediawiki file as a single long string	required
`schema_format`	`str`	The schema format of the source schema string. Allowed normal values: .mediawiki, .xml	`'.xml'`
`schema_namespace`	`(str, None)`	The name_prefix all tags in this schema will accept.	`None`
`schema`	`HedSchema or None`	A HED schema to merge this new file into It must be a with-standard schema with the same value.	`None`
`name`	`str or None`	User supplied identifier for this schema	`None`

Returns:

Type	Description
`HedSchema`	The loaded schema.

:raises HedFileError: - If empty string or invalid extension is passed. - Other fatal formatting issues with file

Notes

The loading is determined by file type.

Source code in hed/schema/hed_schema_io.py

def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None, name=None):
    """ Create a schema from the given string.

    Parameters:
        schema_string (str): An XML or mediawiki file as a single long string
        schema_format (str):         The schema format of the source schema string.
            Allowed normal values: .mediawiki, .xml
        schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
        schema (HedSchema or None): A HED schema to merge this new file into
                                   It must be a with-standard schema with the same value.
        name (str or None): User supplied identifier for this schema

    Returns:
        (HedSchema):  The loaded schema.

    :raises HedFileError:
        - If empty string or invalid extension is passed.
        - Other fatal formatting issues with file

    Notes:
        - The loading is determined by file type.

    """
    if not schema_string:
        raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string",
                           filename=name)

    if isinstance(schema_string, str):
        # Replace carriage returns with new lines since this might not be done by the caller
        schema_string = schema_string.replace("\r\n", "\n")

    if schema_format.endswith(".xml"):
        hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name)
    elif schema_format.endswith(".mediawiki"):
        hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name)
    else:
        raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name)

    if schema_namespace:
        hed_schema.set_schema_prefix(schema_namespace=schema_namespace)
    return hed_schema

get_hed_xml_version ¶

get_hed_xml_version(xml_file_path) -> str

Get the version number from a HED XML file.

Parameters:

Name	Type	Description	Default
`xml_file_path`	`str`	The path to a HED XML file.	required

Returns:

Name	Type	Description
`str`	`str`	The version number of the HED XML file.

:raises HedFileError: - There is an issue loading the schema

Source code in hed/schema/hed_schema_io.py

def get_hed_xml_version(xml_file_path) -> str:
    """ Get the version number from a HED XML file.

    Parameters:
        xml_file_path (str): The path to a HED XML file.

    Returns:
        str: The version number of the HED XML file.

    :raises HedFileError:
        - There is an issue loading the schema
    """
    parser = SchemaLoaderXML(xml_file_path)
    return parser.schema.version

load_schema ¶

load_schema(
    hed_path, schema_namespace=None, schema=None, name=None
) -> "HedSchema"

Load a schema from the given file or URL path.

Parameters:

Name	Type	Description	Default
`hed_path`	`str`	A filepath or url to open a schema from. If loading a TSV file, this should be a single filename where: Template: basename.tsv, where files are named basename_Struct.tsv, basename_Tag.tsv, etc. Alternatively, you can point to a directory containing the .tsv files.	required
`schema_namespace`	`str or None`	The name_prefix all tags in this schema will accept.	`None`
`schema`	`HedSchema or None`	A HED schema to merge this new file into It must be a with-standard schema with the same value.	`None`
`name`	`str or None`	User supplied identifier for this schema	`None`

Returns:

Name	Type	Description
`HedSchema`	`'HedSchema'`	The loaded schema.

:raises HedFileError: - Empty path passed - Unknown extension - Any fatal issues when loading the schema.

Source code in hed/schema/hed_schema_io.py

def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> 'HedSchema':
    """ Load a schema from the given file or URL path.

    Parameters:
        hed_path (str): A filepath or url to open a schema from.
            If loading a TSV file, this should be a single filename where:
            Template: basename.tsv, where files are named basename_Struct.tsv, basename_Tag.tsv, etc.
            Alternatively, you can point to a directory containing the .tsv files.
        schema_namespace (str or None): The name_prefix all tags in this schema will accept.
        schema (HedSchema or None): A HED schema to merge this new file into
                                   It must be a with-standard schema with the same value.
        name (str or None): User supplied identifier for this schema

    Returns:
        HedSchema: The loaded schema.

    :raises HedFileError:
        - Empty path passed
        - Unknown extension
        - Any fatal issues when loading the schema.

    """
    if not hed_path:
        raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file",
                           filename=hed_path)

    is_url = hed_cache._check_if_url(hed_path)
    if is_url:
        try:
            file_as_string = schema_util.url_to_string(hed_path)
        except URLError as e:
            raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e
        hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name)
    elif hed_path.lower().endswith(".xml"):
        hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name)
    elif hed_path.lower().endswith(".mediawiki"):
        hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema, name=name)
    elif hed_path.lower().endswith(".tsv") or os.path.isdir(hed_path):
        if schema is not None:
            raise HedFileError(HedExceptions.INVALID_HED_FORMAT,
                               "Cannot pass a schema to merge into spreadsheet loading currently.", filename=name)
        hed_schema = SchemaLoaderDF.load_spreadsheet(filenames=hed_path, name=name)
    else:
        raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path)

    if schema_namespace:
        hed_schema.set_schema_prefix(schema_namespace=schema_namespace)

    return hed_schema

load_schema_version ¶

load_schema_version(
    xml_version=None, xml_folder=None
) -> Union["HedSchema", "HedSchemaGroup"]

Return a HedSchema or HedSchemaGroup extracted from xml_version

Parameters:

Name	Type	Description	Default
`xml_version`	`str or list`	List or str specifying which official HED schemas to use. A json str format is also supported, based on the output of HedSchema.get_formatted_version Basic format: `[schema_namespace:][library_name_]X.Y.Z`.	`None`
`xml_folder`	`str`	Path to a folder containing schema.	`None`

Returns:

Type	Description
`Union['HedSchema', 'HedSchemaGroup']`	HedSchema or HedSchemaGroup: The schema or schema group extracted.

:raises HedFileError: - The xml_version is not valid. - The specified version cannot be found or loaded - Other fatal errors loading the schema (These are unlikely if you are not editing them locally) - The prefix is invalid

Source code in hed/schema/hed_schema_io.py

def load_schema_version(xml_version=None, xml_folder=None) -> Union['HedSchema', 'HedSchemaGroup']:
    """ Return a HedSchema or HedSchemaGroup extracted from xml_version

    Parameters:
        xml_version (str or list): List or str specifying which official HED schemas to use.
                                           A json str format is also supported,
                                           based on the output of HedSchema.get_formatted_version
                                           Basic format: `[schema_namespace:][library_name_]X.Y.Z`.
        xml_folder (str): Path to a folder containing schema.

    Returns:
        HedSchema or HedSchemaGroup: The schema or schema group extracted.

    :raises HedFileError:
        - The xml_version is not valid.
        - The specified version cannot be found or loaded
        - Other fatal errors loading the schema (These are unlikely if you are not editing them locally)
        - The prefix is invalid
    """
    # Check if we start and end with a square bracket, or double quote.  This might be valid json
    if xml_version and isinstance(xml_version, str) and \
            ((xml_version[0], xml_version[-1]) in [('[', ']'), ('"', '"')]):
        try:
            xml_version = json.loads(xml_version)
        except json.decoder.JSONDecodeError as e:
            raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e
    if xml_version and isinstance(xml_version, list):
        xml_versions = parse_version_list(xml_version)
        schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in
                   xml_versions.values()]
        if len(schemas) == 1:
            return schemas[0]

        name = ",".join([schema.version for schema in schemas])
        return HedSchemaGroup(schemas, name=name)
    else:
        return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder)

parse_version_list ¶

parse_version_list(xml_version_list) -> dict

Takes a list of xml versions and returns a dictionary split by prefix

e.g. ["score", "testlib"] will return {"": "score, testlib"}
e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"}

Parameters:

Name	Type	Description	Default
`xml_version_list`	`list`	List of str specifying which HED schemas to use	required

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary of version strings split by prefix.

Source code in hed/schema/hed_schema_io.py

def parse_version_list(xml_version_list) -> dict:
    """Takes a list of xml versions and returns a dictionary split by prefix

        e.g. ["score", "testlib"] will return {"": "score, testlib"}
        e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"}

    Parameters:
        xml_version_list (list): List of str specifying which HED schemas to use

    Returns:
        dict: A dictionary of version strings split by prefix.
    """
    out_versions = defaultdict(list)
    for version in xml_version_list:
        schema_namespace = ""
        if version and ":" in version:
            schema_namespace, _, version = version.partition(":")

        if not isinstance(version, str):
            raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
                               f"Must specify schema version by number, found no version on {xml_version_list} schema.",
                               filename=None)
        if version in out_versions[schema_namespace]:
            raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_LIBRARY,
                               f"Attempting to load same library '{version}' twice: {out_versions[schema_namespace]}",
                               filename=None)
        out_versions[schema_namespace].append(version)

    out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in
                    out_versions.items()}

    return out_versions

HedSchemaEntry¶

HedSchemaEntry ¶

A single node in a HedSchema.

The structure contains all the node information including attributes and properties.

Source code in hed/schema/hed_schema_entry.py

class HedSchemaEntry:
    """ A single node in a HedSchema.

        The structure contains all the node information including attributes and properties.

    """
    def __init__(self, name, section):
        """ Constructor for HedSchemaEntry.

        Parameters:
            name (str): The name of the entry.
            section (HedSchemaSection):  The section to which it belongs.

        """
        self.name = name
        # key: property/attribute name, value = property value.  Will often be a bool
        self.attributes = {}
        self.description = None
        self._section = section

        # This section is largely unused.  It will only be filled in when we try to add an attribute
        # that isn't valid in this section.
        self._unknown_attributes = None

    def finalize_entry(self, schema):
        """ Called once after loading to set internal state.

        Parameters:
            schema (HedSchema): The schema that holds the rules.

        """
        # Clear out any known attributes from the unknown section
        to_remove = []
        if self._unknown_attributes:
            for attribute in self._unknown_attributes:
                if attribute in self._section.valid_attributes:
                    to_remove.append(attribute)

            for item in to_remove:
                self._unknown_attributes.pop(item)

    def has_attribute(self, attribute, return_value=False) -> Union[bool, Any]:
        """ Checks for the existence of an attribute in this entry.

        Parameters:
            attribute (str): The attribute to check for.
            return_value (bool): If True, returns the actual value of the attribute.
                                 If False, returns a boolean indicating the presence of the attribute.

        Returns:
            bool or any: If return_value is False, returns True if the attribute exists and False otherwise.
            If return_value is True, returns the value of the attribute if it exists, else returns None.

        Notes:
            - The existence of an attribute does not guarantee its validity.
        """
        if return_value:
            return self.attributes.get(attribute, None)
        else:
            return attribute in self.attributes

    def attribute_has_property(self, attribute, property_name) -> bool:
        """ Return True if attribute has property.

        Parameters:
            attribute (str): Attribute name to check for property_name.
            property_name (str): The property value to return.

        Returns:
            bool: Returns True if this entry has the property.

        """
        attr_entry = self._section.valid_attributes.get(attribute)
        if attr_entry and attr_entry.has_attribute(property_name):
            return True
        return False

    def _set_attribute_value(self, attribute, attribute_value):
        """ Add attribute and set its value.

        Parameters:
            attribute (str): The name of the schema entry attribute.
            attribute_value (bool or str):  The value of the attribute.

        Notes:
            - If this an invalid attribute name, it will be also added as an unknown attribute.

        """
        if not attribute_value:
            return

        # todo: remove this patch and redo the code
        # This check doesn't need to be done if the schema is valid.
        if attribute not in self._section.valid_attributes:
            # print(f"Unknown attribute {attribute}")
            if self._unknown_attributes is None:
                self._unknown_attributes = {}
            self._unknown_attributes[attribute] = attribute_value
        self.attributes[attribute] = attribute_value

    @property
    def section_key(self):
        return self._section.section_key

    def __eq__(self, other):
        if self.name != other.name:
            return False
        if not self._compare_attributes_no_order(self.attributes, other.attributes):
            return False
        if self.description != other.description:
            return False
        return True

    def __hash__(self):
        return hash(self.name)

    def __str__(self):
        return self.name

    @staticmethod
    def _compare_attributes_no_order(left, right):
        if left != right:
            left = {name: (set(value.split(",")) if isinstance(value, str) else value)
                    for (name, value) in left.items()}
            right = {name: (set(value.split(",")) if isinstance(value, str) else value)
                     for (name, value) in right.items()}

        return left == right

attribute_has_property ¶

attribute_has_property(attribute, property_name) -> bool

Return True if attribute has property.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	Attribute name to check for property_name.	required
`property_name`	`str`	The property value to return.	required

Returns:

Name	Type	Description
`bool`	`bool`	Returns True if this entry has the property.

Source code in hed/schema/hed_schema_entry.py

def attribute_has_property(self, attribute, property_name) -> bool:
    """ Return True if attribute has property.

    Parameters:
        attribute (str): Attribute name to check for property_name.
        property_name (str): The property value to return.

    Returns:
        bool: Returns True if this entry has the property.

    """
    attr_entry = self._section.valid_attributes.get(attribute)
    if attr_entry and attr_entry.has_attribute(property_name):
        return True
    return False

finalize_entry ¶

finalize_entry(schema)

Called once after loading to set internal state.

Parameters:

Name	Type	Description	Default
`schema`	`HedSchema`	The schema that holds the rules.	required

Source code in hed/schema/hed_schema_entry.py

def finalize_entry(self, schema):
    """ Called once after loading to set internal state.

    Parameters:
        schema (HedSchema): The schema that holds the rules.

    """
    # Clear out any known attributes from the unknown section
    to_remove = []
    if self._unknown_attributes:
        for attribute in self._unknown_attributes:
            if attribute in self._section.valid_attributes:
                to_remove.append(attribute)

        for item in to_remove:
            self._unknown_attributes.pop(item)

has_attribute ¶

has_attribute(
    attribute, return_value=False
) -> Union[bool, Any]

Checks for the existence of an attribute in this entry.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	The attribute to check for.	required
`return_value`	`bool`	If True, returns the actual value of the attribute. If False, returns a boolean indicating the presence of the attribute.	`False`

Returns:

Type	Description
`Union[bool, Any]`	bool or any: If return_value is False, returns True if the attribute exists and False otherwise.
`Union[bool, Any]`	If return_value is True, returns the value of the attribute if it exists, else returns None.

Notes

The existence of an attribute does not guarantee its validity.

Source code in hed/schema/hed_schema_entry.py

def has_attribute(self, attribute, return_value=False) -> Union[bool, Any]:
    """ Checks for the existence of an attribute in this entry.

    Parameters:
        attribute (str): The attribute to check for.
        return_value (bool): If True, returns the actual value of the attribute.
                             If False, returns a boolean indicating the presence of the attribute.

    Returns:
        bool or any: If return_value is False, returns True if the attribute exists and False otherwise.
        If return_value is True, returns the value of the attribute if it exists, else returns None.

    Notes:
        - The existence of an attribute does not guarantee its validity.
    """
    if return_value:
        return self.attributes.get(attribute, None)
    else:
        return attribute in self.attributes

HedSchemaGroup¶

HedSchemaGroup ¶

Bases: HedSchemaBase

Container for multiple HedSchema objects.

Notes

The container class is useful when library schema are included.
You cannot save/load/etc. the combined schema object directly.

Source code in hed/schema/hed_schema_group.py

class HedSchemaGroup(HedSchemaBase):
    """ Container for multiple HedSchema objects.

    Notes:
        - The container class is useful when library schema are included.
        - You cannot save/load/etc. the combined schema object directly.

    """
    def __init__(self, schema_list, name=""):
        """ Combine multiple HedSchema objects from a list.

        Parameters:
            schema_list (list): A list of HedSchema for the container.

        Returns:
            HedSchemaGroup: the container created.

        :raises HedFileError:
            - Multiple schemas have the same library prefixes.
            - Empty list passed
        """
        super().__init__()
        if len(schema_list) == 0:
            raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty list passed to HedSchemaGroup constructor.",
                               filename=self.name)
        schema_prefixes = [hed_schema._namespace for hed_schema in schema_list]
        if len(set(schema_prefixes)) != len(schema_prefixes):
            raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX,
                               "Multiple schema share the same tag name_prefix.  This is not allowed.",
                               filename=self.name)
        self._schemas = {hed_schema._namespace: hed_schema for hed_schema in schema_list}
        source_formats = [hed_schema.source_format for hed_schema in schema_list]
        # All must be same source format or return None.
        self.source_format = source_formats[0] if len(set(source_formats)) == 1 else None
        self._name = name

    def get_schema_versions(self) -> list[str]:
        """ A list of HED version strings including namespace and library name if any for these schemas.

        Returns:
            list[str]: The complete version of this schema including library name and namespace.
        """
        return [schema.version for schema in self._schemas.values()]

    def get_formatted_version(self) -> str:
        """ The HED version string including namespace and library name if any of this schema.

        Returns:
            str: The complete version of this schema including library name and namespace.
        """
        return json.dumps(self.get_schema_versions())

    def __eq__(self, other):
        return self._schemas == other._schemas

    def schema_for_namespace(self, namespace) -> Union[HedSchema,None]:
        """ Return the HedSchema for the library namespace.

        Parameters:
            namespace (str): A schema library name namespace.

        Returns:
            Union[HedSchema,None]: The specific schema for this library name namespace if exists.

        """
        schema = self._schemas.get(namespace)
        return schema

    @property
    def valid_prefixes(self) -> list[str]:
        """ Return a list of all prefixes this group will accept.

        Returns:
            list[str]:  A list of strings representing valid prefixes for this group.

        """
        return list(self._schemas.keys())

    def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
        """ Check for HED3 compliance of this schema.

        Parameters:
            check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
            name (str): If present, use as the filename for context, rather than using the actual filename.
                        Useful for temp filenames when supporting web services.
            error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

        Returns:
            list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
        """
        issues_list = []
        for schema in self._schemas.values():
            issues_list += schema.check_compliance(check_for_warnings, name, error_handler)
        return issues_list

    def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list:
        """ Return tag entries with the given attribute.

        Parameters:
            attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
            key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

        Returns:
            list: A list of all tags with this attribute.

        Notes:
            - The result is cached so will be fast after first call.
        """
        tags = set()
        for schema in self._schemas.values():
            tags.update(schema.get_tags_with_attribute(attribute, key_class))
        return list(tags)

    def get_tag_entry(self, name, key_class=HedSectionKey.Tags, schema_namespace="") -> Union["HedSchemaEntry", None]:
        """ Return the schema entry for this tag, if one exists.

        Parameters:
            name (str): Any form of basic tag(or other section entry) to look up.
                This will not handle extensions or similar.
                If this is a tag, it can have a schema namespace, but it's not required
            key_class (HedSectionKey or str):  The type of entry to return.
            schema_namespace (str): Only used on Tags.  If incorrect, will return None.

        Returns:
            HedSchemaEntry: The schema entry for the given tag.
        """
        specific_schema = self.schema_for_namespace(schema_namespace)
        if not specific_schema:
            return None

        return specific_schema.get_tag_entry(name, key_class, schema_namespace)

    def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list]:
        """ Find the schema entry for a given source tag.

        Parameters:
            tag (str, HedTag): Any form of tag to look up.  Can have an extension, value, etc.
            schema_namespace (str): The schema namespace of the tag, if any.

        Returns:
            HedTagEntry: The located tag entry for this tag.
            str: The remainder of the tag that isn't part of the base tag.
            list: A list of errors while converting.

        Notes:
            Works left to right (which is mostly relevant for errors).
        """
        specific_schema = self.schema_for_namespace(schema_namespace)
        if not specific_schema:
            validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                          schema_namespace, self.valid_prefixes)
            return None, None, validation_issues

        return specific_schema._find_tag_entry(tag, schema_namespace)

valid_prefixes `property` ¶

valid_prefixes: list[str]

Return a list of all prefixes this group will accept.

Returns:

Type	Description
`list[str]`	list[str]: A list of strings representing valid prefixes for this group.

check_compliance ¶

check_compliance(
    check_for_warnings=True, name=None, error_handler=None
) -> list[dict]

Check for HED3 compliance of this schema.

Parameters:

Name	Type	Description	Default
`check_for_warnings`	`bool`	If True, checks for formatting issues like invalid characters, capitalization.	`True`
`name`	`str`	If present, use as the filename for context, rather than using the actual filename. Useful for temp filenames when supporting web services.	`None`
`error_handler`	`ErrorHandler or None`	Used to report errors. Uses a default one if none passed in.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.

Source code in hed/schema/hed_schema_group.py

def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
    """ Check for HED3 compliance of this schema.

    Parameters:
        check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
        name (str): If present, use as the filename for context, rather than using the actual filename.
                    Useful for temp filenames when supporting web services.
        error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

    Returns:
        list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
    """
    issues_list = []
    for schema in self._schemas.values():
        issues_list += schema.check_compliance(check_for_warnings, name, error_handler)
    return issues_list

find_tag_entry ¶

find_tag_entry(
    tag, schema_namespace=""
) -> tuple[
    Union["HedTagEntry", None], Union[str, None], list
]

Find the schema entry for a given source tag.

Parameters:

Name	Type	Description	Default
`tag`	`(str, HedTag)`	Any form of tag to look up. Can have an extension, value, etc.	required
`schema_namespace`	`str`	The schema namespace of the tag, if any.	`''`

Returns:

Name	Type	Description
`HedTagEntry`	`Union['HedTagEntry', None]`	The located tag entry for this tag.
`str`	`Union[str, None]`	The remainder of the tag that isn't part of the base tag.
`list`	`list`	A list of errors while converting.

Notes

Works left to right (which is mostly relevant for errors).

Source code in hed/schema/hed_schema_group.py

def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list]:
    """ Find the schema entry for a given source tag.

    Parameters:
        tag (str, HedTag): Any form of tag to look up.  Can have an extension, value, etc.
        schema_namespace (str): The schema namespace of the tag, if any.

    Returns:
        HedTagEntry: The located tag entry for this tag.
        str: The remainder of the tag that isn't part of the base tag.
        list: A list of errors while converting.

    Notes:
        Works left to right (which is mostly relevant for errors).
    """
    specific_schema = self.schema_for_namespace(schema_namespace)
    if not specific_schema:
        validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                      schema_namespace, self.valid_prefixes)
        return None, None, validation_issues

    return specific_schema._find_tag_entry(tag, schema_namespace)

get_formatted_version ¶

get_formatted_version() -> str

The HED version string including namespace and library name if any of this schema.

Returns:

Name	Type	Description
`str`	`str`	The complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema_group.py

def get_formatted_version(self) -> str:
    """ The HED version string including namespace and library name if any of this schema.

    Returns:
        str: The complete version of this schema including library name and namespace.
    """
    return json.dumps(self.get_schema_versions())

get_schema_versions ¶

get_schema_versions() -> list[str]

A list of HED version strings including namespace and library name if any for these schemas.

Returns:

Type	Description
`list[str]`	list[str]: The complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema_group.py

def get_schema_versions(self) -> list[str]:
    """ A list of HED version strings including namespace and library name if any for these schemas.

    Returns:
        list[str]: The complete version of this schema including library name and namespace.
    """
    return [schema.version for schema in self._schemas.values()]

get_tag_entry ¶

get_tag_entry(
    name, key_class=HedSectionKey.Tags, schema_namespace=""
) -> Union["HedSchemaEntry", None]

Return the schema entry for this tag, if one exists.

Parameters:

Name	Type	Description	Default
`name`	`str`	Any form of basic tag(or other section entry) to look up. This will not handle extensions or similar. If this is a tag, it can have a schema namespace, but it's not required	required
`key_class`	`HedSectionKey or str`	The type of entry to return.	`Tags`
`schema_namespace`	`str`	Only used on Tags. If incorrect, will return None.	`''`

Returns:

Name	Type	Description
`HedSchemaEntry`	`Union['HedSchemaEntry', None]`	The schema entry for the given tag.

Source code in hed/schema/hed_schema_group.py

def get_tag_entry(self, name, key_class=HedSectionKey.Tags, schema_namespace="") -> Union["HedSchemaEntry", None]:
    """ Return the schema entry for this tag, if one exists.

    Parameters:
        name (str): Any form of basic tag(or other section entry) to look up.
            This will not handle extensions or similar.
            If this is a tag, it can have a schema namespace, but it's not required
        key_class (HedSectionKey or str):  The type of entry to return.
        schema_namespace (str): Only used on Tags.  If incorrect, will return None.

    Returns:
        HedSchemaEntry: The schema entry for the given tag.
    """
    specific_schema = self.schema_for_namespace(schema_namespace)
    if not specific_schema:
        return None

    return specific_schema.get_tag_entry(name, key_class, schema_namespace)

get_tags_with_attribute ¶

get_tags_with_attribute(
    attribute, key_class=HedSectionKey.Tags
) -> list

Return tag entries with the given attribute.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	A tag attribute. Eg HedKey.ExtensionAllowed	required
`key_class`	`HedSectionKey`	The HedSectionKey for the section to retrieve from.	`Tags`

Returns:

Name	Type	Description
`list`	`list`	A list of all tags with this attribute.

Notes

The result is cached so will be fast after first call.

Source code in hed/schema/hed_schema_group.py

def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list:
    """ Return tag entries with the given attribute.

    Parameters:
        attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
        key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

    Returns:
        list: A list of all tags with this attribute.

    Notes:
        - The result is cached so will be fast after first call.
    """
    tags = set()
    for schema in self._schemas.values():
        tags.update(schema.get_tags_with_attribute(attribute, key_class))
    return list(tags)

schema_for_namespace ¶

schema_for_namespace(namespace) -> Union[HedSchema, None]

Return the HedSchema for the library namespace.

Parameters:

Name	Type	Description	Default
`namespace`	`str`	A schema library name namespace.	required

Returns:

Type	Description
`Union[HedSchema, None]`	Union[HedSchema,None]: The specific schema for this library name namespace if exists.

Source code in hed/schema/hed_schema_group.py

def schema_for_namespace(self, namespace) -> Union[HedSchema,None]:
    """ Return the HedSchema for the library namespace.

    Parameters:
        namespace (str): A schema library name namespace.

    Returns:
        Union[HedSchema,None]: The specific schema for this library name namespace if exists.

    """
    schema = self._schemas.get(namespace)
    return schema

Schema Comparison¶

schema_comparer ¶

Functions supporting comparison of schemas.

SchemaComparer ¶

Class for comparing HED schemas and generating change logs.

Source code in hed/schema/schema_comparer.py

class SchemaComparer:
    """Class for comparing HED schemas and generating change logs."""

    # Class-level constants
    MISC_SECTION = "misc"
    HED_ID_SECTION = "HedId changes"
    EXTRAS_SECTION = "Extras changes"
    SOURCES = "Sources"
    PREFIXES = "Prefixes"
    ANNOTATION_PROPERTY_EXTERNAL = "AnnotationPropertyExternal"

    SECTION_ENTRY_NAMES = {
        HedSectionKey.Tags: "Tag",
        HedSectionKey.Units: "Unit",
        HedSectionKey.UnitClasses: "Unit Class",
        HedSectionKey.ValueClasses: "Value Class",
        HedSectionKey.UnitModifiers: "Unit Modifier",
        HedSectionKey.Properties: "Property",
        HedSectionKey.Attributes: "Attribute",
        MISC_SECTION: "Misc Metadata",
        HED_ID_SECTION: "Modified Hed Ids",
        SOURCES: "Sources",
        PREFIXES: "Prefixes",
        ANNOTATION_PROPERTY_EXTERNAL: "AnnotationPropertyExternal",
    }

    SECTION_ENTRY_NAMES_PLURAL = {
        HedSectionKey.Tags: "Tags",
        HedSectionKey.Units: "Units",
        HedSectionKey.UnitClasses: "Unit Classes",
        HedSectionKey.ValueClasses: "Value Classes",
        HedSectionKey.UnitModifiers: "Unit Modifiers",
        HedSectionKey.Properties: "Properties",
        HedSectionKey.Attributes: "Attributes",
        MISC_SECTION: "Misc Metadata",
        HED_ID_SECTION: "Modified Hed Ids",
        EXTRAS_SECTION: "Extras",
    }

    # TODO: Check that the cases of these are correct.
    DF_EXTRAS = {SOURCES, PREFIXES, ANNOTATION_PROPERTY_EXTERNAL}

    def __init__(self, schema1, schema2):
        """Initialize the SchemaComparer with two schemas."""
        self.schema1 = schema1
        self.schema2 = schema2

    def find_matching_tags(self, sections=(HedSectionKey.Tags,), return_string=True):
        """Compare the tags in the two schemas."""
        matches, _, _, unequal_entries = self.compare_schemas(sections=sections)
        header_summary = self._get_tag_name_summary((matches, unequal_entries))

        # Combine the two dictionaries
        for section_key, section_dict in matches.items():
            section_dict.update(unequal_entries[section_key])

        if return_string:
            final_string = "Nodes with matching names:\n"
            final_string += self._pretty_print_header(header_summary)
            return final_string
        return matches

    def compare_schemas(self, attribute_filter=HedKey.InLibrary, sections=(HedSectionKey.Tags,)):
        """Compare the two schemas section by section."""
        matches, not_in_schema2, not_in_schema1, unequal_entries = {}, {}, {}, {}

        # Handle miscellaneous sections
        if sections is None or self.MISC_SECTION in sections:
            unequal_entries[self.MISC_SECTION] = {}
            if self.schema1.get_save_header_attributes() != self.schema2.get_save_header_attributes():
                unequal_entries[self.MISC_SECTION]['header_attributes'] = \
                    (str(self.schema1.get_save_header_attributes()), str(self.schema2.get_save_header_attributes()))
            if self.schema1.prologue != self.schema2.prologue:
                unequal_entries[self.MISC_SECTION]['prologue'] = (self.schema1.prologue, self.schema2.prologue)
            if self.schema1.epilogue != self.schema2.epilogue:
                unequal_entries[self.MISC_SECTION]['epilogue'] = (self.schema1.epilogue, self.schema2.epilogue)

        # Compare sections
        for section_key in HedSectionKey:
            if sections is not None and section_key not in sections:
                continue
            dict1, dict2 = {}, {}
            section1, section2 = self.schema1[section_key], self.schema2[section_key]
            name_attribute = 'short_tag_name' if section_key == HedSectionKey.Tags else 'name'

            for entry in section1.all_entries:
                if not attribute_filter or entry.has_attribute(attribute_filter):
                    dict1[getattr(entry, name_attribute)] = entry

            for entry in section2.all_entries:
                if not attribute_filter or entry.has_attribute(attribute_filter):
                    dict2[getattr(entry, name_attribute)] = entry

            not_in_schema2[section_key] = {key: dict1[key] for key in dict1 if key not in dict2}
            not_in_schema1[section_key] = {key: dict2[key] for key in dict2 if key not in dict1}
            unequal_entries[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                            if key in dict2 and dict1[key] != dict2[key]}
            matches[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                    if key in dict2 and dict1[key] == dict2[key]}

        return matches, not_in_schema1, not_in_schema2, unequal_entries

    def gather_schema_changes(self, attribute_filter=None):
        """Generate a changelog by comparing the two schemas."""
        _, not_in_1, not_in_2, unequal_entries = self.compare_schemas(attribute_filter=attribute_filter, sections=None)
        change_dict = defaultdict(list)
        self._add_removed_items(change_dict, not_in_2)
        self._add_added_items(change_dict, not_in_1)
        self._add_unequal_entries(change_dict, unequal_entries)
        self._add_extras_changes(change_dict)
        self._sort_changes_by_severity(change_dict)
        return {key: change_dict[key] for key in self.SECTION_ENTRY_NAMES if key in change_dict}

    def pretty_print_change_dict(self, change_dict, title="Schema changes", use_markdown=True):
        """Format the change dictionary into a string."""
        final_strings = []
        line_prefix = " - " if use_markdown else "\t"
        if change_dict:
            final_strings.append(title)
            final_strings.append("")  # add blank line
            for section_key, section_dict in change_dict.items():
                name = self.SECTION_ENTRY_NAMES_PLURAL.get(section_key, section_key)
                line_endings = "**" if use_markdown else ""
                final_strings.append(f"{line_endings}{name}:{line_endings}")
                for item in section_dict:
                    change, tag, change_type = item['change'], item['tag'], item['change_type']
                    final_strings.append(f"{line_prefix}{tag} ({change_type}): {change}")
                final_strings.append("")
        return "\n".join(final_strings)

    def compare_differences(self, attribute_filter=None, title=""):
        """Compare the tags and extras in the two schemas, reporting all differences."""
        changelog = self.gather_schema_changes(attribute_filter=attribute_filter)
        if not title:
            title = f"Differences between {self.schema1.name} and {self.schema2.name}"
        return self.pretty_print_change_dict(changelog, title=title)

    # Private helper methods
    def _pretty_print_header(self, summary_dict):
        """Format a summary dictionary of tag names by section into a string."""
        output_string = ""
        first_entry = True
        for section_key, tag_names in summary_dict.items():
            if not tag_names:
                continue
            type_name = self.SECTION_ENTRY_NAMES_PLURAL[section_key]
            if not first_entry:
                output_string += "\n"
            output_string += f"{type_name}: "
            output_string += ", ".join(sorted(tag_names))
            output_string += "\n"
            first_entry = False
        return output_string

    @staticmethod
    def _get_tag_name_summary(tag_dicts):
        """Combine dictionaries into a summary of tag names by section."""
        out_dict = {section_key: [] for section_key in HedSectionKey}
        for tag_dict in tag_dicts:
            for section_key, section in tag_dict.items():
                out_dict[section_key].extend(section.keys())
        return out_dict

    def _add_removed_items(self, change_dict, not_in_2):
        """Add removed items to the change dictionary."""
        for section_key, section in not_in_2.items():
            for tag, _ in section.items():
                type_name = self.SECTION_ENTRY_NAMES_PLURAL[section_key]
                change_type = 'Major' if section_key == HedSectionKey.Tags else 'Unknown'
                change_dict[section_key].append(
                    {'change_type': change_type, 'change': f'Tag {tag} deleted from {type_name}', 'tag': tag}
                )

    @staticmethod
    def _add_added_items(change_dict, not_in_1):
        """Add added items to the change dictionary."""
        for section_key, section in not_in_1.items():
            for tag, _ in section.items():
                change_dict[section_key].append(
                    {'change_type': 'Minor', 'change': f'Item {tag} added', 'tag': tag}
                )

    def _add_unequal_entries(self, change_dict, unequal_entries):
        """Add unequal entries to the change dictionary."""
        for section_key, changes in unequal_entries.items():
            if section_key == self.MISC_SECTION:
                self._add_misc_section_changes(change_dict, section_key, changes)
            elif section_key in self.DF_EXTRAS:
                self._add_extras_section_changes(change_dict, section_key, changes)
            else:
                for tag, (entry1, entry2) in changes.items():
                    if section_key == HedSectionKey.UnitClasses:
                        self._add_unit_classes_changes(change_dict, section_key, entry1, entry2)
                    elif section_key == HedSectionKey.Tags:
                        self._add_tag_changes(change_dict, section_key, entry1, entry2)
                    self._check_other_attributes(change_dict, section_key, entry1, entry2)
                    if entry1.description != entry2.description:
                        change_dict[section_key].append(
                            {'change_type': 'Patch', 'change': f'Description of {tag} modified', 'tag': tag})

    @staticmethod
    def _add_misc_section_changes(change_dict, section_key, changes):
        """Add changes for the misc section to the change dictionary."""
        for misc_section, (value1, value2) in changes.items():
            change_type = 'Patch' if "prologue" in misc_section or "epilogue" in misc_section else 'Patch'
            change_desc = f'{misc_section} changed' if "prologue" in misc_section or "epilogue" in misc_section \
                else f'{misc_section} changed from {value1} to {value2}'
            change_dict[section_key].append({'change_type': change_type, 'change': change_desc, 'tag': misc_section})

    def _add_extras_section_changes(self, change_dict, section_key, changes):
        """Add changes for extras sections (dataframes) to the change dictionary."""
        pass  # Placeholder for extras section changes logic.

    @staticmethod
    def _add_unit_classes_changes(change_dict, section_key, entry1, entry2):
        """Add changes for unit classes to the change dictionary."""
        for unit in entry1.units:
            if unit not in entry2.units:
                change_dict[section_key].append(
                    {'change_type': 'Major', 'change': f'Unit {unit} removed from {entry1.name}', 'tag': entry1.name}
                )
        for unit in entry2.units:
            if unit not in entry1.units:
                change_dict[section_key].append(
                    {'change_type': 'Patch', 'change': f'Unit {unit} added to {entry2.name}', 'tag': entry1.name}
                )

    def _add_tag_changes(self, change_dict, section_key, entry1, entry2):
        """Add changes for tags to the change dictionary."""
        for unit_class in entry1.unit_classes:
            if unit_class not in entry2.unit_classes:
                change_dict[section_key].append(
                    {'change_type': 'Major', 'change': f'Unit class {unit_class} removed from {entry1.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        for unit_class in entry2.unit_classes:
            if unit_class not in entry1.unit_classes:
                change_dict[section_key].append(
                    {'change_type': 'Patch', 'change': f'Unit class {unit_class} added to {entry2.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        for value_class in entry1.value_classes:
            if value_class not in entry2.value_classes:
                change_dict[section_key].append(
                    {'change_type': 'Unknown', 'change': f'Value class {value_class} removed from {entry1.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        for value_class in entry2.value_classes:
            if value_class not in entry1.value_classes:
                change_dict[section_key].append(
                    {'change_type': 'Minor', 'change': f'Value class {value_class} added to {entry2.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        if entry1.long_tag_name != entry2.long_tag_name:
            change_dict[section_key].append(
                {'change_type': 'Minor',
                 'change': f'Tag {entry1.short_tag_name} moved in schema from {entry1.long_tag_name} to {entry2.long_tag_name}',
                 'tag': entry1.short_tag_name}
            )
        self._add_suggested_tag_changes(change_dict, entry1, entry2, HedKey.SuggestedTag, "Suggested tag")
        self._add_suggested_tag_changes(change_dict, entry1, entry2, HedKey.RelatedTag, "Related tag")

    @staticmethod
    def _add_suggested_tag_changes(change_dict, entry1, entry2, attribute, label):
        """Add changes for suggested or related tags to the change dictionary."""
        related_tag1 = ", ".join(sorted(entry1.inherited_attributes.get(attribute, "").split(",")))
        related_tag2 = ", ".join(sorted(entry2.inherited_attributes.get(attribute, "").split(",")))
        if related_tag1 != related_tag2:
            if not related_tag1:
                related_tag1 = "empty"
            if not related_tag2:
                related_tag2 = "empty"
            change_dict[HedSectionKey.Tags].append(
                {'change_type': 'Patch',
                 'change': f'{label} changed on {entry1.short_tag_name} from {related_tag1} to {related_tag2}',
                 'tag': entry1.short_tag_name})

    def _check_other_attributes(self, change_dict, section_key, entry1, entry2):
        """Compare non-specialized attributes and add differences to the change dictionary."""
        already_checked_attributes = [HedKey.RelatedTag, HedKey.SuggestedTag, HedKey.ValueClass, HedKey.UnitClass]
        unique_keys = set(entry1.attributes.keys()).union(entry2.attributes.keys())
        if section_key == HedSectionKey.Tags:
            unique_inherited_keys = set(entry1.inherited_attributes.keys()).union(entry2.inherited_attributes.keys())
        else:
            unique_inherited_keys = unique_keys
        all_unique_keys = unique_keys.union(unique_inherited_keys).difference(already_checked_attributes)

        for key in all_unique_keys:
            is_inherited = key in unique_inherited_keys
            is_direct = key in unique_keys

            if section_key == HedSectionKey.Tags:
                value1 = entry1.inherited_attributes.get(key)
                value2 = entry2.inherited_attributes.get(key)
            else:
                value1 = entry1.attributes.get(key)
                value2 = entry2.attributes.get(key)

            if value1 != value2:
                change_type = "Patch"
                start_text = f"Attribute {key} "
                if is_inherited and not is_direct:
                    change_type = "Minor"
                    start_text = f"Inherited attribute {key} "

                if value1 is True and value2 is None:
                    end_text = "removed"
                elif value1 is None and value2 is True:
                    end_text = "added"
                else:
                    end_text = f"modified from {value1} to {value2}"

                use_section_key = section_key
                if key == HedKey.HedID:
                    use_section_key = self.HED_ID_SECTION
                change_dict[use_section_key].append({
                    "change_type": change_type,
                    "change": f"{start_text}{end_text}",
                    "tag": entry1.name if section_key != HedSectionKey.Tags else entry1.short_tag_name,
                    "section": section_key
                })

    def _add_extras_changes(self, change_dict):
        """Compare the extras (dataframes) in two schemas and add differences to the change dictionary."""
        from hed.schema.schema_io.df_constants import extras_column_dict, UNIQUE_EXTRAS_KEYS

        extras1 = getattr(self.schema1, "extras", {}) or {}
        extras2 = getattr(self.schema2, "extras", {}) or {}

        all_keys = set(extras1.keys()).union(extras2.keys())
        for key in all_keys:
            df1 = extras1.get(key)
            df2 = extras2.get(key)
            if df1 is None and df2 is not None:
                change_dict[key].append({'change_type': 'Minor', 'change': f'Entire {key} section missing in first schema', 'tag': key})
                continue
            if df2 is None and df1 is not None:
                change_dict[key].append({'change_type': 'Minor', 'change': f'Entire {key} section missing in second schema', 'tag': key})
                continue
            if df1 is None and df2 is None:
                continue

            df1 = df1.copy()
            df2 = df2.copy()
            df1.columns = [c.lower() for c in df1.columns]
            df2.columns = [c.lower() for c in df2.columns]

            key_cols = UNIQUE_EXTRAS_KEYS.get(key)
            if not key_cols:
                key_cols = list(set(df1.columns) & set(df2.columns))

            compare_cols = list(set(df1.columns) & set(df2.columns))
            if not compare_cols:
                continue

            df1 = df1[compare_cols]
            df2 = df2[compare_cols]

            diff_results = self._compare_dataframes(df1, df2, key_cols)
            for diff in diff_results:
                row_key = diff['row']
                cols = diff['cols']
                msg = diff['message']
                if msg == 'Row missing in first schema':
                    change_dict[key].append({'change_type': 'Minor', 'change': f'Row {row_key} missing in first schema', 'tag': str(row_key)})
                elif msg == 'Row missing in second schema':
                    change_dict[key].append({'change_type': 'Minor', 'change': f'Row {row_key} missing in second schema', 'tag': str(row_key)})
                elif msg == 'Duplicate keys found':
                    change_dict[key].append({'change_type': 'Unknown', 'change': f'Duplicate key {row_key} found in one or both schemas', 'tag': str(row_key)})
                elif msg == 'Column values differ':
                    col_str = ', '.join(cols) if cols else ''
                    change_dict[key].append({'change_type': 'Patch', 'change': f'Row {row_key} columns differ: {col_str}', 'tag': str(row_key)})

    @staticmethod
    def _compare_dataframes(df1, df2, key_cols):
        """Compare two dataframes by key columns and report row/column differences."""
        results = []

        df1_indexed = df1.set_index(key_cols)
        df2_indexed = df2.set_index(key_cols)

        all_keys = set(df1_indexed.index).union(df2_indexed.index)

        for key in all_keys:
            if key not in df1_indexed.index:
                results.append({'row': key, 'cols': None, 'message': 'Row missing in first schema'})
            elif key not in df2_indexed.index:
                results.append({'row': key, 'cols': None, 'message': 'Row missing in second schema'})
            else:
                row1 = df1_indexed.loc[key]
                row2 = df2_indexed.loc[key]

                if isinstance(row1, pd.DataFrame) or isinstance(row2, pd.DataFrame):
                    results.append({'row': key, 'cols': None, 'message': 'Duplicate keys found'})
                    continue

                unequal_cols = [col for col in df1.columns if col not in key_cols and row1[col] != row2[col]]
                if unequal_cols:
                    results.append({'row': key, 'cols': unequal_cols, 'message': 'Column values differ'})

        return results

    @staticmethod
    def _sort_changes_by_severity(changes_dict):
        """Sort the changelist by severity.

        Parameters:
            changes_dict (dict): Dictionary mapping section keys to lists of change dicts.
        """
        for section in changes_dict.values():
            order = {'Major': 1, 'Minor': 2, 'Patch': 3, 'Unknown': 4}
            section.sort(key=lambda x: order.get(x['change_type'], order['Unknown']))

compare_differences ¶

compare_differences(attribute_filter=None, title='')

Compare the tags and extras in the two schemas, reporting all differences.

Source code in hed/schema/schema_comparer.py

def compare_differences(self, attribute_filter=None, title=""):
    """Compare the tags and extras in the two schemas, reporting all differences."""
    changelog = self.gather_schema_changes(attribute_filter=attribute_filter)
    if not title:
        title = f"Differences between {self.schema1.name} and {self.schema2.name}"
    return self.pretty_print_change_dict(changelog, title=title)

compare_schemas ¶

compare_schemas(
    attribute_filter=HedKey.InLibrary,
    sections=(HedSectionKey.Tags,),
)

Compare the two schemas section by section.

Source code in hed/schema/schema_comparer.py

def compare_schemas(self, attribute_filter=HedKey.InLibrary, sections=(HedSectionKey.Tags,)):
    """Compare the two schemas section by section."""
    matches, not_in_schema2, not_in_schema1, unequal_entries = {}, {}, {}, {}

    # Handle miscellaneous sections
    if sections is None or self.MISC_SECTION in sections:
        unequal_entries[self.MISC_SECTION] = {}
        if self.schema1.get_save_header_attributes() != self.schema2.get_save_header_attributes():
            unequal_entries[self.MISC_SECTION]['header_attributes'] = \
                (str(self.schema1.get_save_header_attributes()), str(self.schema2.get_save_header_attributes()))
        if self.schema1.prologue != self.schema2.prologue:
            unequal_entries[self.MISC_SECTION]['prologue'] = (self.schema1.prologue, self.schema2.prologue)
        if self.schema1.epilogue != self.schema2.epilogue:
            unequal_entries[self.MISC_SECTION]['epilogue'] = (self.schema1.epilogue, self.schema2.epilogue)

    # Compare sections
    for section_key in HedSectionKey:
        if sections is not None and section_key not in sections:
            continue
        dict1, dict2 = {}, {}
        section1, section2 = self.schema1[section_key], self.schema2[section_key]
        name_attribute = 'short_tag_name' if section_key == HedSectionKey.Tags else 'name'

        for entry in section1.all_entries:
            if not attribute_filter or entry.has_attribute(attribute_filter):
                dict1[getattr(entry, name_attribute)] = entry

        for entry in section2.all_entries:
            if not attribute_filter or entry.has_attribute(attribute_filter):
                dict2[getattr(entry, name_attribute)] = entry

        not_in_schema2[section_key] = {key: dict1[key] for key in dict1 if key not in dict2}
        not_in_schema1[section_key] = {key: dict2[key] for key in dict2 if key not in dict1}
        unequal_entries[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                        if key in dict2 and dict1[key] != dict2[key]}
        matches[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                if key in dict2 and dict1[key] == dict2[key]}

    return matches, not_in_schema1, not_in_schema2, unequal_entries

find_matching_tags ¶

find_matching_tags(
    sections=(HedSectionKey.Tags,), return_string=True
)

Compare the tags in the two schemas.

Source code in hed/schema/schema_comparer.py

def find_matching_tags(self, sections=(HedSectionKey.Tags,), return_string=True):
    """Compare the tags in the two schemas."""
    matches, _, _, unequal_entries = self.compare_schemas(sections=sections)
    header_summary = self._get_tag_name_summary((matches, unequal_entries))

    # Combine the two dictionaries
    for section_key, section_dict in matches.items():
        section_dict.update(unequal_entries[section_key])

    if return_string:
        final_string = "Nodes with matching names:\n"
        final_string += self._pretty_print_header(header_summary)
        return final_string
    return matches

gather_schema_changes ¶

gather_schema_changes(attribute_filter=None)

Generate a changelog by comparing the two schemas.

Source code in hed/schema/schema_comparer.py

def gather_schema_changes(self, attribute_filter=None):
    """Generate a changelog by comparing the two schemas."""
    _, not_in_1, not_in_2, unequal_entries = self.compare_schemas(attribute_filter=attribute_filter, sections=None)
    change_dict = defaultdict(list)
    self._add_removed_items(change_dict, not_in_2)
    self._add_added_items(change_dict, not_in_1)
    self._add_unequal_entries(change_dict, unequal_entries)
    self._add_extras_changes(change_dict)
    self._sort_changes_by_severity(change_dict)
    return {key: change_dict[key] for key in self.SECTION_ENTRY_NAMES if key in change_dict}

pretty_print_change_dict ¶

pretty_print_change_dict(
    change_dict, title="Schema changes", use_markdown=True
)

Format the change dictionary into a string.

Source code in hed/schema/schema_comparer.py

def pretty_print_change_dict(self, change_dict, title="Schema changes", use_markdown=True):
    """Format the change dictionary into a string."""
    final_strings = []
    line_prefix = " - " if use_markdown else "\t"
    if change_dict:
        final_strings.append(title)
        final_strings.append("")  # add blank line
        for section_key, section_dict in change_dict.items():
            name = self.SECTION_ENTRY_NAMES_PLURAL.get(section_key, section_key)
            line_endings = "**" if use_markdown else ""
            final_strings.append(f"{line_endings}{name}:{line_endings}")
            for item in section_dict:
                change, tag, change_type = item['change'], item['tag'], item['change_type']
                final_strings.append(f"{line_prefix}{tag} ({change_type}): {change}")
            final_strings.append("")
    return "\n".join(final_strings)

HED Cache Functions¶

hed_cache ¶

Infrastructure for caching HED schema from remote repositories.

cache_local_versions ¶

cache_local_versions(cache_folder) -> int

Cache all schemas included with the HED installation.

Parameters:

Name	Type	Description	Default
`cache_folder`	`str`	The folder holding the cache.	required

Returns:

Type	Description
`int`	int or None: Returns -1 on cache access failure. None otherwise

Source code in hed/schema/hed_cache.py

def cache_local_versions(cache_folder) -> int:
    """ Cache all schemas included with the HED installation.

    Parameters:
        cache_folder (str): The folder holding the cache.

    Returns:
        int or None: Returns -1 on cache access failure.  None otherwise

    """
    if not cache_folder:
        cache_folder = HED_CACHE_DIRECTORY

    try:
        with CacheLock(cache_folder, write_time=False):
            _copy_installed_folder_to_cache(cache_folder)
    except CacheException:
        return -1

cache_xml_versions ¶

cache_xml_versions(
    hed_base_urls=DEFAULT_URL_LIST,
    hed_library_urls=DEFAULT_LIBRARY_URL_LIST,
    skip_folders=DEFAULT_SKIP_FOLDERS,
    cache_folder=None,
) -> float

Cache all schemas at the given URLs.

Parameters:

Name	Type	Description	Default
`hed_base_urls`	`str or list`	Path or list of paths. These should point to a single folder.	`DEFAULT_URL_LIST`
`hed_library_urls`	`str or list`	Path or list of paths. These should point to folder containing library folders.	`DEFAULT_LIBRARY_URL_LIST`
`skip_folders`	`list`	A list of subfolders to skip over when downloading.	`DEFAULT_SKIP_FOLDERS`
`cache_folder`	`str`	The folder holding the cache.	`None`

Returns:

Name	Type	Description
`float`	`float`	Returns -1 if cache failed for any reason, including having been cached too recently. Returns 0 if it successfully cached this time.

Notes

The Default skip_folders is 'deprecated'.
The HED cache folder defaults to HED_CACHE_DIRECTORY.
The directories on GitHub are of the form: https://api.github.com/repos/hed-standard/hed-schemas/contents/standard_schema

Source code in hed/schema/hed_cache.py

def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_LIBRARY_URL_LIST,
                       skip_folders=DEFAULT_SKIP_FOLDERS, cache_folder=None) -> float:
    """ Cache all schemas at the given URLs.

    Parameters:
        hed_base_urls (str or list): Path or list of paths.   These should point to a single folder.
        hed_library_urls (str or list): Path or list of paths.  These should point to folder containing library folders.
        skip_folders (list): A list of subfolders to skip over when downloading.
        cache_folder (str): The folder holding the cache.

    Returns:
        float: Returns -1 if cache failed for any reason, including having been cached too recently.
               Returns 0 if it successfully cached this time.

    Notes:
        - The Default skip_folders is 'deprecated'.
        - The HED cache folder defaults to HED_CACHE_DIRECTORY.
        - The directories on GitHub are of the form:
            https://api.github.com/repos/hed-standard/hed-schemas/contents/standard_schema

    """
    if not cache_folder:
        cache_folder = HED_CACHE_DIRECTORY

    try:
        with CacheLock(cache_folder):
            if isinstance(hed_base_urls, str):
                hed_base_urls = [hed_base_urls]
            if isinstance(hed_library_urls, str):
                hed_library_urls = [hed_library_urls]
            all_hed_versions = {}
            for hed_base_url in hed_base_urls:
                new_hed_versions = _get_hed_xml_versions_one_library(hed_base_url)
                _merge_in_versions(all_hed_versions, new_hed_versions)
            for hed_library_url in hed_library_urls:
                new_hed_versions = _get_hed_xml_versions_from_url_all_libraries(hed_library_url,
                                                                                skip_folders=skip_folders)
                _merge_in_versions(all_hed_versions, new_hed_versions)

            for library_name, hed_versions in all_hed_versions.items():
                for version, version_info in hed_versions.items():
                    _cache_hed_version(version, library_name, version_info, cache_folder=cache_folder)

    except CacheException or ValueError or URLError:
        return -1

    return 0

get_cache_directory ¶

get_cache_directory(cache_folder=None) -> str

Return the current value of HED_CACHE_DIRECTORY.

Parameters:

Name	Type	Description	Default
`cache_folder`	`str`	Optional cache folder override.	`None`

Returns:

Name	Type	Description
`str`	`str`	The cache directory path.

Source code in hed/schema/hed_cache.py

def get_cache_directory(cache_folder=None) -> str:
    """ Return the current value of HED_CACHE_DIRECTORY.

    Parameters:
        cache_folder (str): Optional cache folder override.

    Returns:
        str: The cache directory path.
    """
    if cache_folder:
        return cache_folder
    return HED_CACHE_DIRECTORY

get_hed_version_path ¶

get_hed_version_path(
    xml_version,
    library_name=None,
    local_hed_directory=None,
    check_prerelease=False,
)

Get HED XML file path in a directory. Only returns filenames that exist.

Parameters:

Name	Type	Description	Default
`library_name`	`str or None`	Optional the schema library name.	`None`
`xml_version`	`str`	Returns this version if it exists	required
`local_hed_directory`	`str`	Path to local HED directory. Defaults to HED_CACHE_DIRECTORY	`None`
`check_prerelease`	`bool`	Also check for prerelease schemas	`False`

Returns: str: The path to the latest HED version the HED directory.

Source code in hed/schema/hed_cache.py

def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None, check_prerelease=False):
    """ Get HED XML file path in a directory.  Only returns filenames that exist.

    Parameters:
        library_name (str or None): Optional the schema library name.
        xml_version (str): Returns this version if it exists
        local_hed_directory (str): Path to local HED directory.  Defaults to HED_CACHE_DIRECTORY
        check_prerelease (bool): Also check for prerelease schemas
    Returns:
        str: The path to the latest HED version the HED directory.

    """
    if not local_hed_directory:
        local_hed_directory = HED_CACHE_DIRECTORY

    hed_versions = get_hed_versions(local_hed_directory, library_name, check_prerelease)
    if not hed_versions or not xml_version:
        return None
    if xml_version in hed_versions:
        return _create_xml_filename(xml_version, library_name, local_hed_directory, check_prerelease)

get_hed_versions ¶

get_hed_versions(
    local_hed_directory=None,
    library_name=None,
    check_prerelease=False,
) -> Union[list, dict]

Get the HED versions in the HED directory.

Parameters:

Name	Type	Description	Default
`local_hed_directory`	`str`	Directory to check for versions which defaults to hed_cache.	`None`
`library_name`	`str or None`	An optional schema library name. None retrieves the standard schema only. Pass "all" to retrieve all standard and library schemas as a dict.	`None`
`check_prerelease`	`bool`	If True, results can include prerelease schemas	`False`

Returns:

Type	Description
`Union[list, dict]`	Union[list, dict]: List of version numbers or dictionary {library_name: [versions]}.

Source code in hed/schema/hed_cache.py

def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelease=False) -> Union[list, dict]:
    """ Get the HED versions in the HED directory.

    Parameters:
        local_hed_directory (str): Directory to check for versions which defaults to hed_cache.
        library_name (str or None): An optional schema library name.
                                    None retrieves the standard schema only.
                                    Pass "all" to retrieve all standard and library schemas as a dict.
        check_prerelease (bool): If True, results can include prerelease schemas

    Returns:
        Union[list, dict]: List of version numbers or dictionary {library_name: [versions]}.

    """
    if not local_hed_directory:
        local_hed_directory = HED_CACHE_DIRECTORY

    if not library_name:
        library_name = None

    all_hed_versions = {}
    local_directories = [local_hed_directory]
    if check_prerelease and not local_hed_directory.endswith(prerelease_suffix):
        local_directories.append(os.path.join(local_hed_directory, "prerelease"))

    hed_files = []
    for hed_dir in local_directories:
        try:
            hed_files += os.listdir(hed_dir)
        except FileNotFoundError:
            pass
    if not hed_files:
        cache_local_versions(local_hed_directory)
        for hed_dir in local_directories:
            try:
                hed_files += os.listdir(hed_dir)
            except FileNotFoundError:
                pass
    for hed_file in hed_files:
        expression_match = version_pattern.match(hed_file)
        if expression_match is not None:
            version = expression_match.group(3)
            found_library_name = expression_match.group(2)
            if library_name != "all" and found_library_name != library_name:
                continue
            if found_library_name not in all_hed_versions:
                all_hed_versions[found_library_name] = []
            all_hed_versions[found_library_name].append(version)
    for name, hed_versions in all_hed_versions.items():
        all_hed_versions[name] = _sort_version_list(hed_versions)
    if library_name in all_hed_versions:
        return all_hed_versions[library_name]
    return all_hed_versions

get_library_data `cached` ¶

get_library_data(library_name, cache_folder=None) -> dict

Retrieve the library data for the given library.

Currently, this is just the valid ID range.

Parameters:

Name	Type	Description	Default
`library_name`	`str`	The schema name. "" for standard schema.	required
`cache_folder`	`str`	The cache folder to use if not using the default.	`None`

Returns:

Name	Type	Description
`dict`	`dict`	The data for a specific library.

Source code in hed/schema/hed_cache.py

@functools.lru_cache(maxsize=50)
def get_library_data(library_name, cache_folder=None) -> dict:
    """Retrieve the library data for the given library.

       Currently, this is just the valid ID range.

       Parameters:
           library_name (str): The schema name.  "" for standard schema.
           cache_folder (str): The cache folder to use if not using the default.

       Returns:
           dict: The data for a specific library.
    """
    if cache_folder is None:
        cache_folder = HED_CACHE_DIRECTORY

    cache_lib_data_folder = os.path.join(cache_folder, "library_data")

    local_library_data_filename = os.path.join(cache_lib_data_folder, "library_data.json")
    try:
        with open(local_library_data_filename) as file:
            library_data = json.load(file)
        specific_library = library_data[library_name]
        return specific_library
    except (OSError, CacheException, ValueError, KeyError):
        pass

    try:
        with CacheLock(cache_lib_data_folder, write_time=False):
            _copy_installed_folder_to_cache(cache_lib_data_folder, "library_data")

        with open(local_library_data_filename) as file:
            library_data = json.load(file)
        specific_library = library_data[library_name]
        return specific_library
    except (OSError, CacheException, ValueError, KeyError):
        pass

    try:
        with CacheLock(cache_lib_data_folder):
            # if this fails it'll fail to load in the next step
            _cache_specific_url(LIBRARY_DATA_URL, local_library_data_filename)
        with open(local_library_data_filename) as file:
            library_data = json.load(file)
        specific_library = library_data[library_name]
        return specific_library
    except (OSError, CacheException, ValueError, URLError, KeyError):
        pass

    # This failed to get any data for some reason
    return {}

set_cache_directory ¶

set_cache_directory(new_cache_dir)

Set default global HED cache directory.

Parameters:

Name	Type	Description	Default
`new_cache_dir`	`str`	Directory to check for versions.	required

Source code in hed/schema/hed_cache.py

def set_cache_directory(new_cache_dir):
    """ Set default global HED cache directory.

    Parameters:
        new_cache_dir (str): Directory to check for versions.

    """
    if new_cache_dir:
        global HED_CACHE_DIRECTORY
        HED_CACHE_DIRECTORY = new_cache_dir
        os.makedirs(new_cache_dir, exist_ok=True)

Schema Validation Utilities¶

schema_validation_util ¶

Utilities used in HED validation/loading using a HED schema.

get_allowed_characters ¶

get_allowed_characters(value_classes) -> set[str]

Returns the allowed characters in a given container of value classes

Parameters:

Name	Type	Description	Default
`value_classes`	`list of HedSchemaEntry`	A list of schema entries that should have the allowedCharacter attribute	required

Returns:

Type	Description
`set[str]`	set[str]: The set of all characters from the given classes

Source code in hed/schema/schema_validation_util.py

def get_allowed_characters(value_classes) -> set[str]:
    """Returns the allowed characters in a given container of value classes

    Parameters:
        value_classes (list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute

    Returns:
        set[str]: The set of all characters from the given classes
    """
    # This could be pre-computed
    character_set_names = []

    for value_class in value_classes:
        allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
        character_set_names.extend(allowed_types)

    character_set = get_allowed_characters_by_name(character_set_names)
    # for now, just always allow these special cases(it's validated extensively elsewhere)
    character_set.update("#/")
    return character_set

get_allowed_characters_by_name ¶

get_allowed_characters_by_name(
    character_set_names,
) -> set[str]

Returns the allowed characters from a list of character set names

Note: "nonascii" is a special case "character" that can be included as well

Parameters:

Name	Type	Description	Default
`character_set_names`	`list of str`	A list of character sets to allow. See hed_schema_constants.character_types	required

Returns:

Type	Description
`set[str]`	set[str]: The set of all characters from the names

Source code in hed/schema/schema_validation_util.py

def get_allowed_characters_by_name(character_set_names) -> set[str]:
    """Returns the allowed characters from a list of character set names

    Note: "nonascii" is a special case "character" that can be included as well

    Parameters:
        character_set_names (list of str): A list of character sets to allow.  See hed_schema_constants.character_types

    Returns:
        set[str]: The set of all characters from the names
    """
    character_set = set()
    for name in character_set_names:
        if name in character_types and name != "nonascii":
            character_set.update(character_types[name])
        else:
            character_set.add(name)
    return character_set

get_problem_indexes ¶

get_problem_indexes(
    validation_string, character_set, index_adj=0
) -> list[tuple[str, int]]

Finds indexes with values not in character set

Parameters:

Name	Type	Description	Default
`validation_string`	`str`	The string to check characters in.	required
`character_set`	`set`	The list of valid characters (or the value "nonascii" as a set entry).	required
`index_adj`	`int`	The value to adjust the reported indices by, if this isn't the start of a string.	`0`

Returns:

Type	Description
`list[tuple[str, int]]`	list[tuple[str, int]]: The list of problematic characters and their indices.

Source code in hed/schema/schema_validation_util.py

def get_problem_indexes(validation_string, character_set, index_adj=0) -> list[tuple[str, int]]:
    """Finds indexes with values not in character set

    Parameters:
        validation_string (str): The string to check characters in.
        character_set (set): The list of valid characters (or the value "nonascii" as a set entry).
        index_adj (int): The value to adjust the reported indices by, if this isn't the start of a string.

    Returns:
        list[tuple[str, int]]: The list of problematic characters and their indices.
    """
    if not character_set:
        return []

    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
    if "nonascii" in character_set:
        indexes = [(char, index) for char, index in indexes if not ord(char) > 127]

    return indexes

schema_version_for_library ¶

schema_version_for_library(
    hed_schema, library_name
) -> Union[str, None]

Given the library name and HED schema object, return the version

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	the schema object	required
`library_name`	`str or None`	The library name you're interested in. "" for the standard schema.	required

Returns:

Type	Description
`Union[str, None]`	Union[str, None]: The version number of the given library name. Returns None if unknown library_name.

Source code in hed/schema/schema_validation_util.py

def schema_version_for_library(hed_schema, library_name) -> Union[str, None]:
    """ Given the library name and HED schema object, return the version

    Parameters:
        hed_schema (HedSchema): the schema object
        library_name (str or None): The library name you're interested in.  "" for the standard schema.

    Returns:
        Union[str, None]: The version number of the given library name.  Returns None if unknown library_name.
    """
    if library_name is None:
        library_name = ""
    names = hed_schema.library.split(",")
    versions = hed_schema.version_number.split(",")
    for name, version in zip(names, versions):
        if name == library_name:
            return version

    # Return the partnered schema version
    if library_name == "" and hed_schema.with_standard:
        return hed_schema.with_standard
    return None

validate_schema_description_new ¶

validate_schema_description_new(hed_entry) -> list[dict]

Check the description of the entry for invalid character issues

Parameters:

Name	Type	Description	Default
`hed_entry`	`HedSchemaEntry`	A single schema entry	required

Returns:

Type	Description
`list[dict]`	list[dict]: A list issues pertaining to all invalid characters found in description. Each issue is a dictionary.

Source code in hed/schema/schema_validation_util.py

def validate_schema_description_new(hed_entry) -> list[dict]:
    """ Check the description of the entry for invalid character issues

    Parameters:
        hed_entry (HedSchemaEntry): A single schema entry

    Returns:
        list[dict]: A list issues pertaining to all invalid characters found in description. Each issue is a dictionary.
    """
    if not hed_entry.description:
        return []
    issues_list = []
    character_set = get_allowed_characters_by_name(["text", "comma"])
    indexes = get_problem_indexes(hed_entry.description, character_set)
    # Kludge, just get short name here if we have it for error reporting
    name = hed_entry.name
    if hasattr(hed_entry, "short_tag_name"):
        name = hed_entry.short_tag_name
    for char, index in indexes:

        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
                                                 hed_entry.description, name, problem_char=char, char_index=index)
    return issues_list

validate_schema_tag_new ¶

validate_schema_tag_new(hed_entry) -> list[dict]

Check tag entry for capitalization and illegal characters.

Parameters:

Name	Type	Description	Default
`hed_entry`	`HedTagEntry`	A single tag entry	required

Returns:

Type	Description
`list[dict]`	list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.

Source code in hed/schema/schema_validation_util.py

def validate_schema_tag_new(hed_entry) -> list[dict]:
    """ Check tag entry for capitalization and illegal characters.

    Parameters:
        hed_entry (HedTagEntry): A single tag entry

    Returns:
        list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.
    """
    issues_list = []
    hed_term = hed_entry.short_tag_name
    # Any # terms will have already been validated as the previous entry.
    if hed_term == "#":
        return issues_list

    if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
                                                 hed_term, char_index=0, problem_char=hed_term[0])
    issues_list += validate_schema_term_new(hed_entry, hed_term)
    return issues_list

validate_schema_term_new ¶

validate_schema_term_new(
    hed_entry, hed_term=None
) -> list[dict]

Check the term for invalid character issues

Parameters:

Name	Type	Description	Default
`hed_entry`	`HedSchemaEntry`	A single schema entry	required
`hed_term`	`str or None`	Use instead of hed_entry.name if present.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.

Source code in hed/schema/schema_validation_util.py

def validate_schema_term_new(hed_entry, hed_term=None) -> list[dict]:
    """ Check the term for invalid character issues

    Parameters:
        hed_entry (HedSchemaEntry): A single schema entry
        hed_term (str or None): Use instead of hed_entry.name if present.

    Returns:
        list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.
    """
    if not hed_term:
        hed_term = hed_entry.name
    issues_list = []
    # todo: potentially optimize this someday, as most values are the same
    character_set = get_allowed_characters_by_name(["name"] +
                                                   hed_entry.attributes.get("allowedCharacter", "").split(","))
    indexes = get_problem_indexes(hed_term, character_set)
    for char, index in indexes:
        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
                                                 hed_term, char_index=index, problem_char=char)
    return issues_list

Schema¶

HedSchema¶

HedSchema ¶

attributes property ¶

library property ¶

merged property ¶

properties property ¶

schema_namespace property ¶

tags property ¶

unit_classes property ¶

unit_modifiers property ¶

units property ¶

valid_prefixes property ¶

value_classes property ¶

version property ¶

version_number property ¶

with_standard property ¶

can_save ¶

check_compliance ¶

finalize_dictionaries ¶

find_tag_entry ¶

get_as_dataframes ¶

get_as_mediawiki_string ¶

get_as_xml_string ¶

get_extras ¶

get_formatted_version ¶

get_save_header_attributes ¶

get_schema_versions ¶

get_tag_attribute_names_old ¶

get_tag_entry ¶

get_tags_with_attribute ¶

has_duplicates ¶

save_as_dataframes ¶

save_as_mediawiki ¶

save_as_xml ¶

schema_for_namespace ¶

set_schema_prefix ¶

Schema I/O¶

hed_schema_io ¶

from_dataframes ¶

from_string ¶

get_hed_xml_version ¶

load_schema ¶

load_schema_version ¶

parse_version_list ¶

HedSchemaEntry¶

HedSchemaEntry ¶

attribute_has_property ¶

finalize_entry ¶

has_attribute ¶

HedSchemaGroup¶

HedSchemaGroup ¶

valid_prefixes property ¶

check_compliance ¶

find_tag_entry ¶

get_formatted_version ¶

get_schema_versions ¶

get_tag_entry ¶

get_tags_with_attribute ¶

schema_for_namespace ¶

Schema Comparison¶

schema_comparer ¶

SchemaComparer ¶

compare_differences ¶

compare_schemas ¶

find_matching_tags ¶

gather_schema_changes ¶

pretty_print_change_dict ¶

HED Cache Functions¶

hed_cache ¶

cache_local_versions ¶

cache_xml_versions ¶

get_cache_directory ¶

get_hed_version_path ¶

get_hed_versions ¶

get_library_data cached ¶

set_cache_directory ¶

Schema Validation Utilities¶

schema_validation_util ¶

get_allowed_characters ¶

attributes `property` ¶

library `property` ¶

merged `property` ¶

properties `property` ¶

schema_namespace `property` ¶

tags `property` ¶

unit_classes `property` ¶

unit_modifiers `property` ¶

units `property` ¶

valid_prefixes `property` ¶

value_classes `property` ¶

version `property` ¶

version_number `property` ¶

with_standard `property` ¶

valid_prefixes `property` ¶

get_library_data `cached` ¶