Models¶

Core data models for working with HED data structures.

HedString¶

HedString ¶

Bases: HedGroup

A HED string with its schema and definitions.

Source code in hed/models/hed_string.py

class HedString(HedGroup):
    """ A HED string with its schema and definitions. """

    OPENING_GROUP_CHARACTER = '('
    CLOSING_GROUP_CHARACTER = ')'

    def __init__(self, hed_string, hed_schema, def_dict=None, _contents=None):
        """ Constructor for the HedString class.

        Parameters:
            hed_string (str): A HED string consisting of tags and tag groups.
            hed_schema (HedSchema): The schema to use to identify tags.
            def_dict (DefinitionDict or None): The def dict to use to identify def/def expand tags.
            _contents ([HedGroup and/or HedTag] or None): Create a HedString from this exact list of children.
                                                          Does not make a copy.
        Notes:
            - The HedString object parses its component tags and groups into a tree-like structure.

        """

        if _contents is not None:
            contents = _contents
        else:
            try:
                contents = self.split_into_groups(hed_string, hed_schema, def_dict)
            except ValueError:
                contents = []
        super().__init__(hed_string, contents=contents, startpos=0, endpos=len(hed_string))
        self._schema = hed_schema
        self._from_strings = None
        self._def_dict = def_dict

    @classmethod
    def from_hed_strings(cls, hed_strings):
        """ Factory for creating HedStrings via combination.

        Parameters:
            hed_strings (list or None): A list of HedString objects to combine.
                                        This takes ownership of their children.

        Returns:
            new_string(HedString): The newly combined HedString.
        """
        if not hed_strings:
            raise TypeError("Passed an empty list to from_hed_strings")
        new_string = HedString.__new__(HedString)
        hed_string = ",".join([group._hed_string for group in hed_strings])
        contents = [child for sub_string in hed_strings for child in sub_string.children]
        first_schema = hed_strings[0]._schema
        first_dict = hed_strings[0]._def_dict
        new_string.__init__(hed_string=hed_string, _contents=contents, hed_schema=first_schema, def_dict=first_dict)
        new_string._from_strings = hed_strings
        return new_string

    @property
    def is_group(self):
        """ Always False since the underlying string is not a group with parentheses. """
        return False

    def _calculate_to_canonical_forms(self, hed_schema):
        """ Identify all tags using the given schema.

        Parameters:
            hed_schema (HedSchema, HedSchemaGroup): The schema to use to validate/convert tags.

        Returns:
            list: A list of issues found while converting the string. Each issue is a dictionary.

        """
        validation_issues = []
        for tag in self.get_all_tags():
            validation_issues += tag._calculate_to_canonical_forms(hed_schema)

        return validation_issues

    def __deepcopy__(self, memo):
        # check if the object has already been copied
        if id(self) in memo:
            return memo[id(self)]

        # create a new instance of HedString class, and direct copy all parameters
        new_string = self.__class__.__new__(self.__class__)
        new_string.__dict__.update(self.__dict__)

        # add the new object to the memo dictionary
        memo[id(self)] = new_string

        # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry)
        new_string._original_children = copy.deepcopy(self._original_children, memo)
        new_string._from_strings = copy.deepcopy(self._from_strings, memo)
        new_string.children = copy.deepcopy(self.children, memo)

        return new_string

    def copy(self) -> 'HedString':
        """ Return a deep copy of this string.

        Returns:
            HedString: The copied group.

        """
        return_copy = copy.deepcopy(self)
        return return_copy

    def remove_definitions(self):
        """ Remove definition tags and groups from this string.

            This does not validate definitions and will blindly removing invalid ones as well.
        """
        definition_groups = self.find_top_level_tags({DefTagNames.DEFINITION_KEY}, include_groups=1)
        if definition_groups:
            self.remove(definition_groups)

    def shrink_defs(self) -> 'HedString':
        """ Replace def-expand tags with def tags.

            This does not validate them and will blindly shrink invalid ones as well.

        Returns:
            self
        """
        for def_expand_tag, def_expand_group in self.find_tags({DefTagNames.DEF_EXPAND_KEY}, recursive=True):
            expanded_parent = def_expand_group._parent
            if expanded_parent:
                def_expand_tag.short_base_tag = DefTagNames.DEF_KEY
                def_expand_tag._parent = expanded_parent
                expanded_parent.replace(def_expand_group, def_expand_tag)

        return self

    def expand_defs(self) -> "HedString":
        """ Replace def tags with def-expand tags.

            This does very minimal validation.

        Returns:
            HedString: self
        """
        def_tags = self.find_def_tags(recursive=True, include_groups=0)

        replacements = []
        for tag in def_tags:
            if tag.expandable and not tag.expanded:
                replacements.append((tag, tag.expandable))

        for tag, group in replacements:
            tag_parent = tag._parent
            tag_parent.replace(tag, group)
            tag._parent = group
            tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY

        return self

    def get_as_original(self) -> str:
        """ Return the original form of this string.

        Returns:
            str: The string with all the tags in their original form.

        Notes:
            Potentially with some extraneous spaces removed on returned string.
        """
        return self.get_as_form("org_tag")

    @staticmethod
    def split_into_groups(hed_string, hed_schema, def_dict=None):
        """ Split the HED string into a parse tree.

        Parameters:
            hed_string (str): A HED string consisting of tags and tag groups to be processed.
            hed_schema (HedSchema): HED schema to use to identify tags.
            def_dict (DefinitionDict): The definitions to identify.
        Returns:
            list:  A list of HedTag and/or HedGroup.

        :raises ValueError:
            - The string is significantly malformed, such as mismatched parentheses.

        Notes:
            - The parse tree consists of tag groups, tags, and delimiters.
        """
        current_tag_group = [[]]

        input_tags = HedString.split_hed_string(hed_string)
        for is_hed_tag, (startpos, endpos) in input_tags:
            if is_hed_tag:
                new_tag = HedTag(hed_string, hed_schema, (startpos, endpos), def_dict)
                current_tag_group[-1].append(new_tag)
            else:
                string_portion = hed_string[startpos:endpos]
                delimiter_index = 0
                for i, char in enumerate(string_portion):
                    if not char.isspace():
                        delimiter_index = i
                        break

                delimiter_char = string_portion[delimiter_index]

                if delimiter_char is HedString.OPENING_GROUP_CHARACTER:
                    current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))

                if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
                    # Terminate existing group, and save it off.
                    paren_end = startpos + delimiter_index + 1

                    if len(current_tag_group) > 1:
                        new_group = current_tag_group.pop()
                        new_group._endpos = paren_end

                        current_tag_group[-1].append(new_group)
                    else:
                        raise ValueError(f"Closing parentheses in HED string {hed_string}")

        # Comma delimiter issues are ignored and assumed already validated currently.
        if len(current_tag_group) != 1:
            raise ValueError(f"Unmatched opening parentheses in HED string {hed_string}")

        return current_tag_group[0]

    def _get_org_span(self, tag_or_group):
        """ If this tag or group was in the original HED string, find its original span.

        Parameters:
            tag_or_group (HedTag or HedGroup): The HED tag to locate in this string.

        Returns:
            int or None:   Starting position of the given item in the original string.
            int or None:   Ending position of the given item in the original string.

        Notes:
            - If the HED tag or group was not in the original string, returns (None, None).

        """
        if self._from_strings:
            return self._get_org_span_from_strings(tag_or_group)

        if self.check_if_in_original(tag_or_group):
            return tag_or_group.span

        return None, None

    def _get_org_span_from_strings(self, tag_or_group):
        """ A different case of the above, to handle if this was created from HED string objects."""
        found_string = None
        string_start_index = 0
        for string in self._from_strings:
            if string.check_if_in_original(tag_or_group):
                found_string = string
                break
            # Add 1 for comma
            string_start_index += string.span[1] + 1

        if not found_string:
            return None, None

        return tag_or_group.span[0] + string_start_index, tag_or_group.span[1] + string_start_index

    @staticmethod
    def split_hed_string(hed_string) -> list[tuple[bool, tuple[int, int]]]:
        """ Split a HED string into delimiters and tags.

        Parameters:
            hed_string (str): The HED string to split.

        Returns:
            list[tuple[bool, tuple[int, int]]]:  A list of tuples where each tuple is (is_hed_tag, (start_pos, end_pos)).

        Notes:
            - The tuple format is as follows
                - is_hed_tag (bool): A (possible) HED tag if True, delimiter if not.
                - start_pos (int):   Index of start of string in hed_string.
                - end_pos (int):     Index of end of string in hed_string.

            - This function does not validate tags or delimiters in any form.

        """
        tag_delimiters = ",()"
        current_spacing = 0
        found_symbol = True
        result_positions = []
        tag_start_pos = None
        last_end_pos = 0
        for i, char in enumerate(hed_string):
            if char == " ":
                current_spacing += 1
                continue

            if char in tag_delimiters:
                if found_symbol:
                    if last_end_pos != i:
                        result_positions.append((False, (last_end_pos, i)))
                    last_end_pos = i
                elif not found_symbol:
                    found_symbol = True
                    last_end_pos = i - current_spacing
                    result_positions.append((True, (tag_start_pos, last_end_pos)))
                    current_spacing = 0
                    tag_start_pos = None
                continue

            # If we have a current delimiter, end it here.
            if found_symbol and last_end_pos is not None:
                if last_end_pos != i:
                    result_positions.append((False, (last_end_pos, i)))
                last_end_pos = None

            found_symbol = False
            current_spacing = 0
            if tag_start_pos is None:
                tag_start_pos = i

        if last_end_pos is not None and len(hed_string) != last_end_pos:
            result_positions.append((False, (last_end_pos, len(hed_string))))
        if tag_start_pos is not None:
            result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
            if current_spacing:
                result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))

        return result_positions

    def validate(self, allow_placeholders=True, error_handler=None) -> list[dict]:
        """ Validate the string using the schema.

        Parameters:
            allow_placeholders (bool): Allow placeholders in the string.
            error_handler (ErrorHandler or None): The error handler to use, creates a default one if none passed.
        Returns:
            list[dict]: A list of issues for HED string.
        """
        from hed.validator import HedValidator

        validator = HedValidator(self._schema, def_dicts=self._def_dict)
        return validator.validate(self, allow_placeholders=allow_placeholders, error_handler=error_handler)

    def find_top_level_tags(self, anchor_tags, include_groups=2):
        """ Find top level groups with an anchor tag.

            A max of 1 tag located per top level group.

        Parameters:
            anchor_tags (container):  A list/set/etc. of short_base_tags to find groups by.
            include_groups (0, 1 or 2):  Parameter indicating what return values to include.
                If 0: return only tags.
                If 1: return only groups.
                If 2 or any other value: return both.
        Returns:
            list: The returned result depends on include_groups.
        """
        anchor_tags = {tag.casefold() for tag in anchor_tags}
        top_level_tags = []
        for group in self.groups():
            for tag in group.tags():
                if tag.short_base_tag.casefold() in anchor_tags:
                    top_level_tags.append((tag, group))
                    # Only capture a max of 1 per group.  These are implicitly unique.
                    break

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in top_level_tags]
        return top_level_tags

    def remove_refs(self):
        """ Remove any refs(tags contained entirely inside curly braces) from the string.

            This does NOT validate the contents of the curly braces.  This is only relevant when directly
            editing sidecar strings.  Tools will naturally ignore these.
        """
        ref_tags = [tag for tag in self.get_all_tags() if tag.is_column_ref()]
        if ref_tags:
            self.remove(ref_tags)

is_group `property` ¶

is_group

Always False since the underlying string is not a group with parentheses.

copy ¶

copy() -> 'HedString'

Return a deep copy of this string.

Returns:

Name	Type	Description
`HedString`	`'HedString'`	The copied group.

Source code in hed/models/hed_string.py

def copy(self) -> 'HedString':
    """ Return a deep copy of this string.

    Returns:
        HedString: The copied group.

    """
    return_copy = copy.deepcopy(self)
    return return_copy

expand_defs ¶

expand_defs() -> 'HedString'

Replace def tags with def-expand tags.

This does very minimal validation.

Returns:

Name	Type	Description
`HedString`	`'HedString'`	self

Source code in hed/models/hed_string.py

def expand_defs(self) -> "HedString":
    """ Replace def tags with def-expand tags.

        This does very minimal validation.

    Returns:
        HedString: self
    """
    def_tags = self.find_def_tags(recursive=True, include_groups=0)

    replacements = []
    for tag in def_tags:
        if tag.expandable and not tag.expanded:
            replacements.append((tag, tag.expandable))

    for tag, group in replacements:
        tag_parent = tag._parent
        tag_parent.replace(tag, group)
        tag._parent = group
        tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY

    return self

find_top_level_tags ¶

find_top_level_tags(anchor_tags, include_groups=2)

Find top level groups with an anchor tag.

A max of 1 tag located per top level group.

Parameters:

Name	Type	Description	Default
`anchor_tags`	`container`	A list/set/etc. of short_base_tags to find groups by.	required
`include_groups`	`(0, 1 or 2)`	Parameter indicating what return values to include. If 0: return only tags. If 1: return only groups. If 2 or any other value: return both.	`2`

Returns: list: The returned result depends on include_groups.

Source code in hed/models/hed_string.py

def find_top_level_tags(self, anchor_tags, include_groups=2):
    """ Find top level groups with an anchor tag.

        A max of 1 tag located per top level group.

    Parameters:
        anchor_tags (container):  A list/set/etc. of short_base_tags to find groups by.
        include_groups (0, 1 or 2):  Parameter indicating what return values to include.
            If 0: return only tags.
            If 1: return only groups.
            If 2 or any other value: return both.
    Returns:
        list: The returned result depends on include_groups.
    """
    anchor_tags = {tag.casefold() for tag in anchor_tags}
    top_level_tags = []
    for group in self.groups():
        for tag in group.tags():
            if tag.short_base_tag.casefold() in anchor_tags:
                top_level_tags.append((tag, group))
                # Only capture a max of 1 per group.  These are implicitly unique.
                break

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in top_level_tags]
    return top_level_tags

from_hed_strings `classmethod` ¶

from_hed_strings(hed_strings)

Factory for creating HedStrings via combination.

Parameters:

Name	Type	Description	Default
`hed_strings`	`list or None`	A list of HedString objects to combine. This takes ownership of their children.	required

Returns:

Name	Type	Description
`new_string`	`HedString`	The newly combined HedString.

Source code in hed/models/hed_string.py

@classmethod
def from_hed_strings(cls, hed_strings):
    """ Factory for creating HedStrings via combination.

    Parameters:
        hed_strings (list or None): A list of HedString objects to combine.
                                    This takes ownership of their children.

    Returns:
        new_string(HedString): The newly combined HedString.
    """
    if not hed_strings:
        raise TypeError("Passed an empty list to from_hed_strings")
    new_string = HedString.__new__(HedString)
    hed_string = ",".join([group._hed_string for group in hed_strings])
    contents = [child for sub_string in hed_strings for child in sub_string.children]
    first_schema = hed_strings[0]._schema
    first_dict = hed_strings[0]._def_dict
    new_string.__init__(hed_string=hed_string, _contents=contents, hed_schema=first_schema, def_dict=first_dict)
    new_string._from_strings = hed_strings
    return new_string

get_as_original ¶

get_as_original() -> str

Return the original form of this string.

Returns:

Name	Type	Description
`str`	`str`	The string with all the tags in their original form.

Notes

Potentially with some extraneous spaces removed on returned string.

Source code in hed/models/hed_string.py

def get_as_original(self) -> str:
    """ Return the original form of this string.

    Returns:
        str: The string with all the tags in their original form.

    Notes:
        Potentially with some extraneous spaces removed on returned string.
    """
    return self.get_as_form("org_tag")

remove_definitions ¶

remove_definitions()

Remove definition tags and groups from this string.

This does not validate definitions and will blindly removing invalid ones as well.

Source code in hed/models/hed_string.py

def remove_definitions(self):
    """ Remove definition tags and groups from this string.

        This does not validate definitions and will blindly removing invalid ones as well.
    """
    definition_groups = self.find_top_level_tags({DefTagNames.DEFINITION_KEY}, include_groups=1)
    if definition_groups:
        self.remove(definition_groups)

remove_refs ¶

remove_refs()

Remove any refs(tags contained entirely inside curly braces) from the string.

This does NOT validate the contents of the curly braces. This is only relevant when directly editing sidecar strings. Tools will naturally ignore these.

Source code in hed/models/hed_string.py

def remove_refs(self):
    """ Remove any refs(tags contained entirely inside curly braces) from the string.

        This does NOT validate the contents of the curly braces.  This is only relevant when directly
        editing sidecar strings.  Tools will naturally ignore these.
    """
    ref_tags = [tag for tag in self.get_all_tags() if tag.is_column_ref()]
    if ref_tags:
        self.remove(ref_tags)

shrink_defs ¶

shrink_defs() -> 'HedString'

Replace def-expand tags with def tags.

This does not validate them and will blindly shrink invalid ones as well.

Returns:

Type	Description
`'HedString'`	self

Source code in hed/models/hed_string.py

def shrink_defs(self) -> 'HedString':
    """ Replace def-expand tags with def tags.

        This does not validate them and will blindly shrink invalid ones as well.

    Returns:
        self
    """
    for def_expand_tag, def_expand_group in self.find_tags({DefTagNames.DEF_EXPAND_KEY}, recursive=True):
        expanded_parent = def_expand_group._parent
        if expanded_parent:
            def_expand_tag.short_base_tag = DefTagNames.DEF_KEY
            def_expand_tag._parent = expanded_parent
            expanded_parent.replace(def_expand_group, def_expand_tag)

    return self

split_hed_string `staticmethod` ¶

split_hed_string(
    hed_string,
) -> list[tuple[bool, tuple[int, int]]]

Split a HED string into delimiters and tags.

Parameters:

Name	Type	Description	Default
`hed_string`	`str`	The HED string to split.	required

Returns:

Type	Description
`list[tuple[bool, tuple[int, int]]]`	list[tuple[bool, tuple[int, int]]]: A list of tuples where each tuple is (is_hed_tag, (start_pos, end_pos)).

Notes

The tuple format is as follows
- is_hed_tag (bool): A (possible) HED tag if True, delimiter if not.
- start_pos (int): Index of start of string in hed_string.
- end_pos (int): Index of end of string in hed_string.
This function does not validate tags or delimiters in any form.

Source code in hed/models/hed_string.py

@staticmethod
def split_hed_string(hed_string) -> list[tuple[bool, tuple[int, int]]]:
    """ Split a HED string into delimiters and tags.

    Parameters:
        hed_string (str): The HED string to split.

    Returns:
        list[tuple[bool, tuple[int, int]]]:  A list of tuples where each tuple is (is_hed_tag, (start_pos, end_pos)).

    Notes:
        - The tuple format is as follows
            - is_hed_tag (bool): A (possible) HED tag if True, delimiter if not.
            - start_pos (int):   Index of start of string in hed_string.
            - end_pos (int):     Index of end of string in hed_string.

        - This function does not validate tags or delimiters in any form.

    """
    tag_delimiters = ",()"
    current_spacing = 0
    found_symbol = True
    result_positions = []
    tag_start_pos = None
    last_end_pos = 0
    for i, char in enumerate(hed_string):
        if char == " ":
            current_spacing += 1
            continue

        if char in tag_delimiters:
            if found_symbol:
                if last_end_pos != i:
                    result_positions.append((False, (last_end_pos, i)))
                last_end_pos = i
            elif not found_symbol:
                found_symbol = True
                last_end_pos = i - current_spacing
                result_positions.append((True, (tag_start_pos, last_end_pos)))
                current_spacing = 0
                tag_start_pos = None
            continue

        # If we have a current delimiter, end it here.
        if found_symbol and last_end_pos is not None:
            if last_end_pos != i:
                result_positions.append((False, (last_end_pos, i)))
            last_end_pos = None

        found_symbol = False
        current_spacing = 0
        if tag_start_pos is None:
            tag_start_pos = i

    if last_end_pos is not None and len(hed_string) != last_end_pos:
        result_positions.append((False, (last_end_pos, len(hed_string))))
    if tag_start_pos is not None:
        result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
        if current_spacing:
            result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))

    return result_positions

split_into_groups `staticmethod` ¶

split_into_groups(hed_string, hed_schema, def_dict=None)

Split the HED string into a parse tree.

Parameters:

Name	Type	Description	Default
`hed_string`	`str`	A HED string consisting of tags and tag groups to be processed.	required
`hed_schema`	`HedSchema`	HED schema to use to identify tags.	required
`def_dict`	`DefinitionDict`	The definitions to identify.	`None`

Returns: list: A list of HedTag and/or HedGroup.

:raises ValueError: - The string is significantly malformed, such as mismatched parentheses.

Notes

The parse tree consists of tag groups, tags, and delimiters.

Source code in hed/models/hed_string.py

@staticmethod
def split_into_groups(hed_string, hed_schema, def_dict=None):
    """ Split the HED string into a parse tree.

    Parameters:
        hed_string (str): A HED string consisting of tags and tag groups to be processed.
        hed_schema (HedSchema): HED schema to use to identify tags.
        def_dict (DefinitionDict): The definitions to identify.
    Returns:
        list:  A list of HedTag and/or HedGroup.

    :raises ValueError:
        - The string is significantly malformed, such as mismatched parentheses.

    Notes:
        - The parse tree consists of tag groups, tags, and delimiters.
    """
    current_tag_group = [[]]

    input_tags = HedString.split_hed_string(hed_string)
    for is_hed_tag, (startpos, endpos) in input_tags:
        if is_hed_tag:
            new_tag = HedTag(hed_string, hed_schema, (startpos, endpos), def_dict)
            current_tag_group[-1].append(new_tag)
        else:
            string_portion = hed_string[startpos:endpos]
            delimiter_index = 0
            for i, char in enumerate(string_portion):
                if not char.isspace():
                    delimiter_index = i
                    break

            delimiter_char = string_portion[delimiter_index]

            if delimiter_char is HedString.OPENING_GROUP_CHARACTER:
                current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))

            if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
                # Terminate existing group, and save it off.
                paren_end = startpos + delimiter_index + 1

                if len(current_tag_group) > 1:
                    new_group = current_tag_group.pop()
                    new_group._endpos = paren_end

                    current_tag_group[-1].append(new_group)
                else:
                    raise ValueError(f"Closing parentheses in HED string {hed_string}")

    # Comma delimiter issues are ignored and assumed already validated currently.
    if len(current_tag_group) != 1:
        raise ValueError(f"Unmatched opening parentheses in HED string {hed_string}")

    return current_tag_group[0]

validate ¶

validate(
    allow_placeholders=True, error_handler=None
) -> list[dict]

Validate the string using the schema.

Parameters:

Name	Type	Description	Default
`allow_placeholders`	`bool`	Allow placeholders in the string.	`True`
`error_handler`	`ErrorHandler or None`	The error handler to use, creates a default one if none passed.	`None`

Returns: list[dict]: A list of issues for HED string.

Source code in hed/models/hed_string.py

def validate(self, allow_placeholders=True, error_handler=None) -> list[dict]:
    """ Validate the string using the schema.

    Parameters:
        allow_placeholders (bool): Allow placeholders in the string.
        error_handler (ErrorHandler or None): The error handler to use, creates a default one if none passed.
    Returns:
        list[dict]: A list of issues for HED string.
    """
    from hed.validator import HedValidator

    validator = HedValidator(self._schema, def_dicts=self._def_dict)
    return validator.validate(self, allow_placeholders=allow_placeholders, error_handler=error_handler)

HedTag¶

HedTag ¶

A single HED tag.

Notes

HedTag is a smart class in that it keeps track of its original value and positioning as well as pointers to the relevant HED schema information, if relevant.

Source code in hed/models/hed_tag.py

class HedTag:
    """ A single HED tag.

    Notes:
        - HedTag is a smart class in that it keeps track of its original value and positioning
          as well as pointers to the relevant HED schema information, if relevant.

    """

    def __init__(self, hed_string, hed_schema, span=None, def_dict=None):
        """ Creates a HedTag.

        Parameters:
            hed_string (str): Source HED string for this tag.
            hed_schema (HedSchema): A parameter for calculating canonical forms on creation.
            span  (int, int): The start and end indexes of the tag in the hed_string.
            def_dict (DefinitionDict or None): The def dict to use to identify def/def expand tags.
        """
        self._hed_string = hed_string
        if span is None:
            span = (0, len(hed_string))
        # This is the span into the original HED string for this tag
        self.span = span

        # If this is present, use this as the org tag for most purposes.
        # This is not generally used anymore, but you can use it to replace a tag in place.
        self._tag = None

        self._namespace = self._get_schema_namespace(self.org_tag)

        # This is the schema this tag was converted to.
        self._schema = None
        self._schema_entry = None

        self._extension_value = ""
        self._parent = None

        self._expandable = None
        self._expanded = False

        self.tag_terms = None  # tuple of all the terms in this tag Lowercase.
        self._calculate_to_canonical_forms(hed_schema)

        self._def_entry = None
        if def_dict:
            if self.short_base_tag in {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY}:
                self._def_entry = def_dict.get_definition_entry(self)

    def copy(self) -> "HedTag":
        """ Return a deep copy of this tag.

        Returns:
            HedTag: The copied group.

        """
        save_parent = self._parent
        self._parent = None
        return_copy = copy.deepcopy(self)
        self._parent = save_parent
        return return_copy

    @property
    def schema_namespace(self) -> str:
        """ Library namespace for this tag if one exists.

        Returns:
            namespace (str): The library namespace, including the colon.

        """
        return self._namespace

    @property
    def short_tag(self) -> str:
        """ Short form including value or extension.

        Returns:
            str: The short form of the tag, including value or extension.

        """
        if self._schema_entry:
            return f"{self._namespace}{self._schema_entry.short_tag_name}{self._extension_value}"

        return str(self)

    @property
    def base_tag(self) -> str:
        """ Long form without value or extension.

        Returns:
            base_tag (str): The long form of the tag, without value or extension.
        """
        if self._schema_entry:
            return self._schema_entry.long_tag_name
        return str(self)

    @property
    def short_base_tag(self) -> str:
        """ Short form without value or extension.

        Returns:
            str: The short non-extension port of a tag.

        Notes:
            - ParentNodes/Def/DefName would return just "Def".

        """
        if self._schema_entry:
            return self._schema_entry.short_tag_name
        return str(self)

    @short_base_tag.setter
    def short_base_tag(self, new_tag_val):
        """ Change base tag, leaving extension or value.

        Parameters:
            new_tag_val (str): The new short_base_tag for this tag.

        :raises ValueError:
            - If the tag wasn't already identified.

        Note:
            - Generally this is used to swap def to def-expand.
        """
        if self._schema_entry:
            tag_entry = None
            if self._schema:
                if self.is_takes_value_tag():
                    new_tag_val = new_tag_val + "/#"
                tag_entry = self._schema.get_tag_entry(new_tag_val, schema_namespace=self.schema_namespace)

            self._schema_entry = tag_entry
        else:
            raise ValueError("Cannot set unidentified tags")

    @property
    def org_base_tag(self) -> str:
        """ Original form without value or extension.

        Returns:
            str: The original form of the tag, without value or extension.

        Notes:
            - Warning: This could be empty if the original tag had a name_prefix prepended.
              e.g. a column where "Label/" is prepended, thus the column value has zero base portion.
        """
        if self._schema_entry:
            extension_len = len(self._extension_value)
            if not extension_len:
                return self.tag

            org_len = len(self.tag)
            if org_len == extension_len:
                return ""

            return self.tag[:org_len - extension_len]
        return str(self)

    def tag_modified(self) -> bool:
        """ Return True if tag has been modified from original.

        Returns:
            bool: Return True if the tag is modified.

        Notes:
            - Modifications can include adding a column name_prefix.

        """
        return bool(self._tag)

    @property
    def tag(self) -> str:
        """ Returns the tag.

            Returns the original tag if no user form set.

        Returns:
            str: The custom set user form of the tag.

        """
        if self._tag:
            return self._tag

        return self.org_tag

    @tag.setter
    def tag(self, new_tag_val):
        """ Allow you to overwrite the tag output text.

        Parameters:
            new_tag_val (str): New (implicitly long form) of tag to set.

        Notes:
            - You probably don't actually want to call this.
        """
        self._tag = new_tag_val
        self._schema_entry = None
        self._calculate_to_canonical_forms(self._schema)

    @property
    def extension(self) -> str:
        """ Get the extension or value of tag.

            Generally this is just the portion after the last slash.
            Returns an empty string if no extension or value.

        Returns:
            str: The tag name.

        Notes:
            - This tag must have been computed first.

        """
        if self._extension_value:
            return self._extension_value[1:]

        return ""

    @extension.setter
    def extension(self, x):
        self._extension_value = f"/{x}"

    @property
    def long_tag(self) -> str:
        """ Long form including value or extension.

        Returns:
            str: The long form of this tag.

        """
        if self._schema_entry:
            return f"{self._namespace}{self._schema_entry.long_tag_name}{self._extension_value}"
        return str(self)

    @property
    def org_tag(self) -> str:
        """ Return the original unmodified tag.

        Returns:
            str: The original unmodified tag.

        """
        return self._hed_string[self.span[0]:self.span[1]]

    @property
    def expanded(self) -> bool:
        """Return if this is currently expanded or not.

           Will always be False unless expandable is set.  This is primarily used for Def/Def-expand tags at present.

        Returns:
            bool: True if this is currently expanded.
        """
        return self._expanded

    @property
    def expandable(self) -> Union["HedGroup", "HedTag", None]:
        """Return what this expands to.

           This is primarily used for Def/Def-expand tags at present.

           Lazily set the first time it's called.

        Returns:
            Union[HedGroup,HedTag,None]: Returns the expanded form of this tag.
        """
        if self._expandable is None and self._def_entry:
            save_parent = self._parent
            tag_label, _, placeholder = self.extension.partition('/')

            def_contents = self._def_entry.get_definition(self, placeholder_value=placeholder)
            self._parent = save_parent
            if def_contents is not None:
                self._expandable = def_contents
                self._expanded = self.short_base_tag == DefTagNames.DEF_EXPAND_KEY
        return self._expandable

    def is_column_ref(self) -> bool:
        """ Return if this tag is a column reference from a sidecar.

            You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.

        Returns:
            bool: Returns True if this is a column ref.
        """
        return self.org_tag.startswith('{') and self.org_tag.endswith('}')

    def __str__(self) -> str:
        """ Convert this HedTag to a string.

        Returns:
            str: The original tag if we haven't set a new tag.(e.g. short to long).

        """
        if self._schema_entry:
            return self.short_tag

        if self._tag:
            return self._tag

        return self._hed_string[self.span[0]:self.span[1]]

    def lower(self) -> str:
        """ Convenience function, equivalent to str(self).lower(). """
        return str(self).lower()

    def casefold(self) -> str:
        """ Convenience function, equivalent to str(self).casefold(). """
        return str(self).casefold()

    def _calculate_to_canonical_forms(self, hed_schema) -> list:
        """ Update internal state based on schema.

        Parameters:
            hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag.

        Returns:
            list:  A list of issues found during conversion. Each element is a dictionary.

        """
        tag_entry, remainder, tag_issues = hed_schema.find_tag_entry(self, self.schema_namespace)
        self._schema_entry = tag_entry
        self._schema = hed_schema
        if self._schema_entry:
            self.tag_terms = self._schema_entry.tag_terms
            if remainder:
                self._extension_value = remainder
        else:
            self.tag_terms = tuple()

        return tag_issues

    def get_stripped_unit_value(self, extension_text) -> tuple[Union[str, None], Union[str, None]]:
        """ Return the extension divided into value and units, if the units are valid.

        Parameters:
            extension_text (str): The text to split, in case it's a portion of a tag.

        Returns:
            str or None: The extension portion with the units removed or None if invalid units.
            str or None: The units or None if no units of the right unit class are found.

        Examples:
            'Duration/3 ms' will return ('3', 'ms')

        """
        tag_unit_classes = self.unit_classes
        stripped_value, units, match = HedTag._get_tag_units_portion(extension_text, tag_unit_classes)
        if stripped_value and match:
            return stripped_value, units
        elif units and not match:
            return None, units
        return extension_text, None

    def value_as_default_unit(self) -> Union[float, None]:
        """ Return the value converted to default units if possible or None if invalid.

        Returns:
            float or None: The extension value in default units.
                                   If no default units it assumes that the extension value is in default units.

        Examples:
            'Duration/300 ms' will return .3

        """
        tag_unit_classes = self.unit_classes
        stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes)
        if not stripped_value:
            return None
        if unit and not unit_entry:
            return None
        if unit and unit_entry and unit_entry.get_conversion_factor(unit) is not None:
            return float(stripped_value) * unit_entry.get_conversion_factor(unit)
        return float(stripped_value)

    @property
    def unit_classes(self) -> dict:
        """ Return a dict of all the unit classes this tag accepts.

        Returns:
            dict:  A dict of unit classes this tag accepts.

        Notes:
            - Returns empty dict if this is not a unit class tag.
            - The dictionary has unit name as the key and HedSchemaEntry as value.

        """
        if self._schema_entry:
            return self._schema_entry.unit_classes
        return {}

    @property
    def value_classes(self) -> dict:
        """ Return a dict of all the value classes this tag accepts.

        Returns:
            dict: A dictionary of HedSchemaEntry value classes this tag accepts.

        Notes:
            - Returns empty dict if this is not a value class.
            - The dictionary has unit name as the key and HedSchemaEntry as value.

        """
        if self._schema_entry:
            return self._schema_entry.value_classes
        return {}

    @property
    def attributes(self) -> dict:
        """ Return a dict of all the attributes this tag has.

            Returns empty dict if this is not a value tag.

        Returns:
            dict: A dict of attributes this tag has.

        Notes:
            - Returns empty dict if this is not a unit class tag.
            - The dictionary has unit name as the key and HedSchemaEntry as value.

        """
        if self._schema_entry:
            return self._schema_entry.attributes
        return {}

    def tag_exists_in_schema(self) -> bool:
        """ Return whether the schema entry for this tag exists.

        Returns:
            bool: True if this tag exists.

        Notes:
            - This does NOT assure this is a valid tag.
        """
        return bool(self._schema_entry)

    def is_takes_value_tag(self) -> bool:
        """ Return True if this is a takes value tag.

        Returns:
            bool: True if this is a takes value tag.

        """
        if self._schema_entry:
            return self._schema_entry.has_attribute(HedKey.TakesValue)
        return False

    def is_unit_class_tag(self) -> bool:
        """ Return True if this is a unit class tag.

        Returns:
            bool: True if this is a unit class tag.

        """
        if self._schema_entry:
            return bool(self._schema_entry.unit_classes)
        return False

    def is_value_class_tag(self) -> bool:
        """ Return True if this is a value class tag.

        Returns:
            bool:  True if this is a tag with a value class.

        """
        if self._schema_entry:
            return bool(self._schema_entry.value_classes)
        return False

    def is_basic_tag(self) -> bool:
        """  Return True if a known tag with no extension or value.

        Returns:
            bool:  True if this is a known tag without extension or value.

        """
        return bool(self._schema_entry and not self.extension)

    def has_attribute(self, attribute) -> bool:
        """ Return True if this is an attribute this tag has.

        Parameters:
            attribute (str): Name of the attribute.

        Returns:
            bool: True if this tag has the attribute.

        """
        if self._schema_entry:
            return self._schema_entry.has_attribute(attribute)
        return False

    def get_tag_unit_class_units(self) -> list:
        """ Get the unit class units associated with a particular tag.

        Returns:
            list: A list containing the unit class units associated with a particular tag or an empty list.

        """
        units = []
        unit_classes = self.unit_classes
        for unit_class_entry in unit_classes.values():
            units += unit_class_entry.units.keys()

        return units

    @property
    def default_unit(self):
        """ Get the default unit class unit for this tag.

            Only a tag with a single unit class can have default units.

        Returns:
            unit(UnitEntry or None): the default unit entry for this tag, or None
        """
        # todo: Make this cached
        unit_classes = self.unit_classes.values()
        if len(unit_classes) == 1:
            first_unit_class_entry = list(unit_classes)[0]
            default_unit = first_unit_class_entry.has_attribute(HedKey.DefaultUnits, return_value=True)
            return first_unit_class_entry.units.get(default_unit, None)

    def base_tag_has_attribute(self, tag_attribute) -> bool:
        """ Check to see if the tag has a specific attribute.

            This is primarily used to check for things like TopLevelTag on Definitions and similar.

        Parameters:
            tag_attribute (str): A tag attribute.

        Returns:
            bool: True if the tag has the specified attribute. False, if otherwise.

        """
        if not self._schema_entry:
            return False

        return self._schema_entry.base_tag_has_attribute(tag_attribute)

    @staticmethod
    def _get_schema_namespace(org_tag) -> str:
        """ Finds the library namespace for the tag.

        Parameters:
            org_tag (str): A string representing a tag.

        Returns:
            str: Library namespace string or empty.

        """
        first_slash = org_tag.find("/")
        first_colon = org_tag.find(":")

        if first_colon != -1:
            if first_slash != -1 and first_colon > first_slash:
                return ""

            return org_tag[:first_colon + 1]
        return ""

    @staticmethod
    def _get_tag_units_portion(extension_text, tag_unit_classes):
        """ Split a value portion into value, units and its valid unitEntry (if any).

        Parameters:
            extension_text (str): A string representing the value portion of a tag with unit classes.
            tag_unit_classes (dict): Dictionary of valid UnitClassEntry objects for this tag.

        Returns:
            stripped_value (str or None): The value with the units removed.
                                          This is filled in if there are no units as well.
            units (str or None); The units string or None if no units.
            unitEntry (UnitEntry or None): The matching unit entry if one is found

        Notes:
            value, None, None  -- value portion has no units.
            value, units, unitEntry -- value portion has value and valid units.
            value, units, None -- value portion has a value and invalid units.

        """
        value, _, units = extension_text.partition(" ")
        if not units:
            return value, None, None

        for unit_class_entry in tag_unit_classes.values():
            possible_match = unit_class_entry.get_derivative_unit_entry(units)
            if possible_match:
                return value, units, possible_match
        return value, units, None

    def is_placeholder(self) -> bool:
        """Returns if this tag has a placeholder in it.

        Returns:
            has_placeholder(bool): True if it has a placeholder
        """
        if "#" in self.org_tag or "#" in self._extension_value:
            return True
        return False

    def replace_placeholder(self, placeholder_value):
        """ If tag has a placeholder character(#), replace with value.

        Parameters:
            placeholder_value (str): Value to replace placeholder with.

        """
        if self.is_placeholder():
            if self._schema_entry:
                tag = self.tag.replace('#', placeholder_value)
                self._extension_value = self._extension_value.replace("#", placeholder_value)
                self.tag = tag
            else:
                self._tag = self.tag.replace("#", placeholder_value)

    def get_normalized_str(self):
        if self._schema_entry:
            return self._namespace + self._schema_entry.short_tag_name.casefold() + self._extension_value.casefold()
        else:
            return self.casefold()

    def __hash__(self):
        return hash(self.get_normalized_str())

    def __eq__(self, other):
        if self is other:
            return True

        if isinstance(other, str):
            return self.casefold() == other.casefold()

        if not isinstance(other, HedTag):
            return False

        if self.short_tag == other.short_tag:
            return True

        if self.org_tag.casefold() == other.org_tag.casefold():
            return True
        return False

    def __deepcopy__(self, memo):
        # Check if the object has already been copied.
        if id(self) in memo:
            return memo[id(self)]

        # create a new instance of HedTag class
        new_tag = self.__class__.__new__(self.__class__)
        new_tag.__dict__.update(self.__dict__)

        # add the new object to the memo dictionary
        memo[id(self)] = new_tag

        # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry)
        new_tag._parent = copy.deepcopy(self._parent, memo)
        new_tag._expandable = copy.deepcopy(self._expandable, memo)
        new_tag._expanded = copy.deepcopy(self._expanded, memo)

        return new_tag

attributes `property` ¶

attributes: dict

Return a dict of all the attributes this tag has.

Returns empty dict if this is not a value tag.

Returns:

Name	Type	Description
`dict`	`dict`	A dict of attributes this tag has.

Notes

Returns empty dict if this is not a unit class tag.
The dictionary has unit name as the key and HedSchemaEntry as value.

base_tag `property` ¶

base_tag: str

Long form without value or extension.

Returns:

Name	Type	Description
`base_tag`	`str`	The long form of the tag, without value or extension.

default_unit `property` ¶

default_unit

Get the default unit class unit for this tag.

Only a tag with a single unit class can have default units.

Returns:

Name	Type	Description
`unit`	`UnitEntry or None`	the default unit entry for this tag, or None

expandable `property` ¶

expandable: Union['HedGroup', 'HedTag', None]

Return what this expands to.

This is primarily used for Def/Def-expand tags at present.

Lazily set the first time it's called.

Returns:

Type	Description
`Union['HedGroup', 'HedTag', None]`	Union[HedGroup,HedTag,None]: Returns the expanded form of this tag.

expanded `property` ¶

expanded: bool

Return if this is currently expanded or not.

Will always be False unless expandable is set. This is primarily used for Def/Def-expand tags at present.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is currently expanded.

extension `property` `writable` ¶

extension: str

Get the extension or value of tag.

Generally this is just the portion after the last slash.
Returns an empty string if no extension or value.

Returns:

Name	Type	Description
`str`	`str`	The tag name.

Notes

This tag must have been computed first.

long_tag `property` ¶

long_tag: str

Long form including value or extension.

Returns:

Name	Type	Description
`str`	`str`	The long form of this tag.

org_base_tag `property` ¶

org_base_tag: str

Original form without value or extension.

Returns:

Name	Type	Description
`str`	`str`	The original form of the tag, without value or extension.

Notes

Warning: This could be empty if the original tag had a name_prefix prepended. e.g. a column where "Label/" is prepended, thus the column value has zero base portion.

org_tag `property` ¶

org_tag: str

Return the original unmodified tag.

Returns:

Name	Type	Description
`str`	`str`	The original unmodified tag.

schema_namespace `property` ¶

schema_namespace: str

Library namespace for this tag if one exists.

Returns:

Name	Type	Description
`namespace`	`str`	The library namespace, including the colon.

short_base_tag `property` `writable` ¶

short_base_tag: str

Short form without value or extension.

Returns:

Name	Type	Description
`str`	`str`	The short non-extension port of a tag.

Notes

ParentNodes/Def/DefName would return just "Def".

short_tag `property` ¶

short_tag: str

Short form including value or extension.

Returns:

Name	Type	Description
`str`	`str`	The short form of the tag, including value or extension.

tag `property` `writable` ¶

tag: str

Returns the tag.

Returns the original tag if no user form set.

Returns:

Name	Type	Description
`str`	`str`	The custom set user form of the tag.

unit_classes `property` ¶

unit_classes: dict

Return a dict of all the unit classes this tag accepts.

Returns:

Name	Type	Description
`dict`	`dict`	A dict of unit classes this tag accepts.

Notes

Returns empty dict if this is not a unit class tag.
The dictionary has unit name as the key and HedSchemaEntry as value.

value_classes `property` ¶

value_classes: dict

Return a dict of all the value classes this tag accepts.

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary of HedSchemaEntry value classes this tag accepts.

Notes

Returns empty dict if this is not a value class.
The dictionary has unit name as the key and HedSchemaEntry as value.

base_tag_has_attribute ¶

base_tag_has_attribute(tag_attribute) -> bool

Check to see if the tag has a specific attribute.

This is primarily used to check for things like TopLevelTag on Definitions and similar.

Parameters:

Name	Type	Description	Default
`tag_attribute`	`str`	A tag attribute.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the tag has the specified attribute. False, if otherwise.

Source code in hed/models/hed_tag.py

def base_tag_has_attribute(self, tag_attribute) -> bool:
    """ Check to see if the tag has a specific attribute.

        This is primarily used to check for things like TopLevelTag on Definitions and similar.

    Parameters:
        tag_attribute (str): A tag attribute.

    Returns:
        bool: True if the tag has the specified attribute. False, if otherwise.

    """
    if not self._schema_entry:
        return False

    return self._schema_entry.base_tag_has_attribute(tag_attribute)

casefold ¶

casefold() -> str

Convenience function, equivalent to str(self).casefold().

Source code in hed/models/hed_tag.py

def casefold(self) -> str:
    """ Convenience function, equivalent to str(self).casefold(). """
    return str(self).casefold()

copy ¶

copy() -> 'HedTag'

Return a deep copy of this tag.

Returns:

Name	Type	Description
`HedTag`	`'HedTag'`	The copied group.

Source code in hed/models/hed_tag.py

def copy(self) -> "HedTag":
    """ Return a deep copy of this tag.

    Returns:
        HedTag: The copied group.

    """
    save_parent = self._parent
    self._parent = None
    return_copy = copy.deepcopy(self)
    self._parent = save_parent
    return return_copy

get_stripped_unit_value ¶

get_stripped_unit_value(
    extension_text,
) -> tuple[Union[str, None], Union[str, None]]

Return the extension divided into value and units, if the units are valid.

Parameters:

Name	Type	Description	Default
`extension_text`	`str`	The text to split, in case it's a portion of a tag.	required

Returns:

Type	Description
`Union[str, None]`	str or None: The extension portion with the units removed or None if invalid units.
`Union[str, None]`	str or None: The units or None if no units of the right unit class are found.

Examples:

'Duration/3 ms' will return ('3', 'ms')

Source code in hed/models/hed_tag.py

def get_stripped_unit_value(self, extension_text) -> tuple[Union[str, None], Union[str, None]]:
    """ Return the extension divided into value and units, if the units are valid.

    Parameters:
        extension_text (str): The text to split, in case it's a portion of a tag.

    Returns:
        str or None: The extension portion with the units removed or None if invalid units.
        str or None: The units or None if no units of the right unit class are found.

    Examples:
        'Duration/3 ms' will return ('3', 'ms')

    """
    tag_unit_classes = self.unit_classes
    stripped_value, units, match = HedTag._get_tag_units_portion(extension_text, tag_unit_classes)
    if stripped_value and match:
        return stripped_value, units
    elif units and not match:
        return None, units
    return extension_text, None

get_tag_unit_class_units ¶

get_tag_unit_class_units() -> list

Get the unit class units associated with a particular tag.

Returns:

Name	Type	Description
`list`	`list`	A list containing the unit class units associated with a particular tag or an empty list.

Source code in hed/models/hed_tag.py

def get_tag_unit_class_units(self) -> list:
    """ Get the unit class units associated with a particular tag.

    Returns:
        list: A list containing the unit class units associated with a particular tag or an empty list.

    """
    units = []
    unit_classes = self.unit_classes
    for unit_class_entry in unit_classes.values():
        units += unit_class_entry.units.keys()

    return units

has_attribute ¶

has_attribute(attribute) -> bool

Return True if this is an attribute this tag has.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	Name of the attribute.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if this tag has the attribute.

Source code in hed/models/hed_tag.py

def has_attribute(self, attribute) -> bool:
    """ Return True if this is an attribute this tag has.

    Parameters:
        attribute (str): Name of the attribute.

    Returns:
        bool: True if this tag has the attribute.

    """
    if self._schema_entry:
        return self._schema_entry.has_attribute(attribute)
    return False

is_basic_tag ¶

is_basic_tag() -> bool

Return True if a known tag with no extension or value.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is a known tag without extension or value.

Source code in hed/models/hed_tag.py

def is_basic_tag(self) -> bool:
    """  Return True if a known tag with no extension or value.

    Returns:
        bool:  True if this is a known tag without extension or value.

    """
    return bool(self._schema_entry and not self.extension)

is_column_ref ¶

is_column_ref() -> bool

Return if this tag is a column reference from a sidecar.

You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.

Returns:

Name	Type	Description
`bool`	`bool`	Returns True if this is a column ref.

Source code in hed/models/hed_tag.py

def is_column_ref(self) -> bool:
    """ Return if this tag is a column reference from a sidecar.

        You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.

    Returns:
        bool: Returns True if this is a column ref.
    """
    return self.org_tag.startswith('{') and self.org_tag.endswith('}')

is_placeholder ¶

is_placeholder() -> bool

Returns if this tag has a placeholder in it.

Returns:

Name	Type	Description
`has_placeholder`	`bool`	True if it has a placeholder

Source code in hed/models/hed_tag.py

def is_placeholder(self) -> bool:
    """Returns if this tag has a placeholder in it.

    Returns:
        has_placeholder(bool): True if it has a placeholder
    """
    if "#" in self.org_tag or "#" in self._extension_value:
        return True
    return False

is_takes_value_tag ¶

is_takes_value_tag() -> bool

Return True if this is a takes value tag.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is a takes value tag.

Source code in hed/models/hed_tag.py

def is_takes_value_tag(self) -> bool:
    """ Return True if this is a takes value tag.

    Returns:
        bool: True if this is a takes value tag.

    """
    if self._schema_entry:
        return self._schema_entry.has_attribute(HedKey.TakesValue)
    return False

is_unit_class_tag ¶

is_unit_class_tag() -> bool

Return True if this is a unit class tag.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is a unit class tag.

Source code in hed/models/hed_tag.py

def is_unit_class_tag(self) -> bool:
    """ Return True if this is a unit class tag.

    Returns:
        bool: True if this is a unit class tag.

    """
    if self._schema_entry:
        return bool(self._schema_entry.unit_classes)
    return False

is_value_class_tag ¶

is_value_class_tag() -> bool

Return True if this is a value class tag.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is a tag with a value class.

Source code in hed/models/hed_tag.py

def is_value_class_tag(self) -> bool:
    """ Return True if this is a value class tag.

    Returns:
        bool:  True if this is a tag with a value class.

    """
    if self._schema_entry:
        return bool(self._schema_entry.value_classes)
    return False

lower ¶

lower() -> str

Convenience function, equivalent to str(self).lower().

Source code in hed/models/hed_tag.py

def lower(self) -> str:
    """ Convenience function, equivalent to str(self).lower(). """
    return str(self).lower()

replace_placeholder ¶

replace_placeholder(placeholder_value)

If tag has a placeholder character(#), replace with value.

Parameters:

Name	Type	Description	Default
`placeholder_value`	`str`	Value to replace placeholder with.	required

Source code in hed/models/hed_tag.py

def replace_placeholder(self, placeholder_value):
    """ If tag has a placeholder character(#), replace with value.

    Parameters:
        placeholder_value (str): Value to replace placeholder with.

    """
    if self.is_placeholder():
        if self._schema_entry:
            tag = self.tag.replace('#', placeholder_value)
            self._extension_value = self._extension_value.replace("#", placeholder_value)
            self.tag = tag
        else:
            self._tag = self.tag.replace("#", placeholder_value)

tag_exists_in_schema ¶

tag_exists_in_schema() -> bool

Return whether the schema entry for this tag exists.

Returns:

Name	Type	Description
`bool`	`bool`	True if this tag exists.

Notes

This does NOT assure this is a valid tag.

Source code in hed/models/hed_tag.py

def tag_exists_in_schema(self) -> bool:
    """ Return whether the schema entry for this tag exists.

    Returns:
        bool: True if this tag exists.

    Notes:
        - This does NOT assure this is a valid tag.
    """
    return bool(self._schema_entry)

tag_modified ¶

tag_modified() -> bool

Return True if tag has been modified from original.

Returns:

Name	Type	Description
`bool`	`bool`	Return True if the tag is modified.

Notes

Modifications can include adding a column name_prefix.

Source code in hed/models/hed_tag.py

def tag_modified(self) -> bool:
    """ Return True if tag has been modified from original.

    Returns:
        bool: Return True if the tag is modified.

    Notes:
        - Modifications can include adding a column name_prefix.

    """
    return bool(self._tag)

value_as_default_unit ¶

value_as_default_unit() -> Union[float, None]

Return the value converted to default units if possible or None if invalid.

Returns:

Type	Description
`Union[float, None]`	float or None: The extension value in default units. If no default units it assumes that the extension value is in default units.

Examples:

'Duration/300 ms' will return .3

Source code in hed/models/hed_tag.py

def value_as_default_unit(self) -> Union[float, None]:
    """ Return the value converted to default units if possible or None if invalid.

    Returns:
        float or None: The extension value in default units.
                               If no default units it assumes that the extension value is in default units.

    Examples:
        'Duration/300 ms' will return .3

    """
    tag_unit_classes = self.unit_classes
    stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes)
    if not stripped_value:
        return None
    if unit and not unit_entry:
        return None
    if unit and unit_entry and unit_entry.get_conversion_factor(unit) is not None:
        return float(stripped_value) * unit_entry.get_conversion_factor(unit)
    return float(stripped_value)

HedGroup¶

HedGroup ¶

A single parenthesized HED string.

Source code in hed/models/hed_group.py

class HedGroup:
    """ A single parenthesized HED string. """

    def __init__(self, hed_string="", startpos=None, endpos=None, contents=None):
        """ Return an empty HedGroup object.

        Parameters:
            hed_string (str or None): Source HED string for this group.
            startpos (int or None):   Starting index of group(including parentheses) in hed_string.
            endpos (int or None):     Position after the end (including parentheses) in hed_string.
            contents (list or None):  A list of HedTags and/or HedGroups that will be set as the contents of this group.
                                      Mostly used during definition expansion.
        """
        self._startpos = startpos
        self._endpos = endpos
        self._hed_string = hed_string
        self._parent = None

        if contents:
            self.children = contents
            for child in self.children:
                child._parent = self
        else:
            self.children = []
        self._original_children = self.children

    def append(self, tag_or_group):
        """ Add a tag or group to this group.

        Parameters:
            tag_or_group (HedTag or HedGroup): The new object to add to this group.
        """
        tag_or_group._parent = self
        self.children.append(tag_or_group)

    def check_if_in_original(self, tag_or_group) -> bool:
        """ Check if the tag or group in original string.

        Parameters:
            tag_or_group (HedTag or HedGroup): The HedTag or HedGroup to be looked for in this group.

        Returns:
            bool:  True if in this group.
        """
        node_list = [self]
        final_list = []

        # Using an iterator is worse performance wise here.
        while node_list:
            current_group_or_tag = node_list.pop(0)
            if isinstance(current_group_or_tag, HedGroup):
                node_list = current_group_or_tag._original_children + node_list
            final_list.append(current_group_or_tag)

        return self._check_in_group(tag_or_group, final_list)

    @staticmethod
    def replace(item_to_replace, new_contents):
        """ Replace an existing tag or group.

            Note: This is a static method that relies on the parent attribute of item_to_replace.

        Parameters:
            item_to_replace (HedTag or HedGroup): The item to replace must exist or this will raise an error.
            new_contents (HedTag or HedGroup): Replacement contents.

        :raises KeyError:
            - item_to_replace does not exist.

        :raises AttributeError:
            - item_to_replace has no parent set.
        """
        parent = item_to_replace._parent
        parent._replace(item_to_replace=item_to_replace, new_contents=new_contents)

    def _replace(self, item_to_replace, new_contents):
        """ Replace an existing tag or group.

        Parameters:
            item_to_replace (HedTag or HedGroup): The item to replace must exist and be a direct child,
                                                  or this will raise an error.
            new_contents (HedTag or HedGroup): Replacement contents.

        :raises KeyError:
            - item_to_replace does not exist.
        """
        if self._original_children is self.children:
            self._original_children = self.children.copy()

        for i, child in enumerate(self.children):
            if item_to_replace is child:
                self.children[i] = new_contents
                new_contents._parent = self
                return

        raise KeyError(f"The tag {item_to_replace} not found in the group.")

    def remove(self, items_to_remove: Iterable[Union[HedTag, 'HedGroup']]):
        """ Remove any tags/groups in items_to_remove.

        Parameters:
            items_to_remove (list):  List of HedGroups and/or HedTags to remove by identity.

        Notes:
            - Any groups that become empty will also be pruned.
            - If you pass a child and parent group, the child will also be removed from the parent.
        """
        empty_groups = []
        # Filter out duplicates
        items_to_remove = {id(item): item for item in items_to_remove}.values()

        for item in items_to_remove:
            group = item._parent
            if group._original_children is group.children:
                group._original_children = group.children.copy()

            group.children.remove(item)
            if not group.children and group is not self:
                empty_groups.append(group)

        if empty_groups:
            self.remove(empty_groups)

        # Do this last to avoid confusing typing
        for item in items_to_remove:
            item._parent = None

    def __copy__(self):
        raise ValueError("Cannot make shallow copies of HedGroups")

    def copy(self) -> "HedGroup":
        """ Return a deep copy of this group.

        Returns:
            HedGroup: The copied group.

        """
        save_parent = self._parent
        self._parent = None
        return_copy = copy.deepcopy(self)
        self._parent = save_parent
        return return_copy

    def sort(self):
        """ Sort the tags and groups in this HedString in a consistent order."""
        self._sorted(update_self=True)

    def sorted(self) -> "HedGroup":
        """ Return a sorted copy of this HED group

        Returns:
            sorted_copy (HedGroup): The sorted copy.
        """
        string_copy = self.copy()
        string_copy._sorted(update_self=True)
        return string_copy

    def _sorted(self, update_self=False) -> list:
        """ Return a sorted copy of this HED group as a list of it's children.

        Parameters:
            update_self (bool): If True, update the contents of this group to be sorted as well.

        Returns:
            list: The list of all tags in this group, with subgroups being returned as further nested lists.
        """
        tag_list = []
        group_list = []
        queue_list = list(self.children)
        for child in queue_list:
            if isinstance(child, HedTag):
                tag_list.append((child, child))
            else:
                group_list.append((child, child._sorted(update_self)))

        tag_list.sort(key=lambda x: str(x[0]))
        group_list.sort(key=lambda x: str(x[0]))
        output_list = tag_list + group_list
        if update_self:
            self.children = [x[0] for x in output_list]
        return [x[1] for x in output_list]

    @property
    def is_group(self):
        """ True if this is a parenthesized group. """
        return True

    def get_all_tags(self) -> list:
        """ Return HedTags, including descendants.

        Returns:
            list:  A list of all the tags in this group including descendants.

        """
        node_list = [self]
        final_list = []

        # Using an iterator is worse performance wise here.
        while node_list:
            current_group_or_tag = node_list.pop(0)
            if isinstance(current_group_or_tag, HedGroup):
                node_list = list(current_group_or_tag.children) + node_list
            else:
                final_list.append(current_group_or_tag)
        return final_list

    def get_all_groups(self, also_return_depth=False) -> list:
        """ Return HedGroups, including descendants and self.

        Parameters:
            also_return_depth (bool): If True, yield tuples (group, depth) rather than just groups.

        Returns:
            list: The list of all HedGroups in this group, including descendants and self.

        """
        node_list = [self]
        final_list = []

        # Using an iterator is worse performance wise here.
        while node_list:
            current_group_or_tag = node_list.pop(0)
            if isinstance(current_group_or_tag, HedGroup):
                node_list = list(current_group_or_tag.children) + node_list
                final_list.append(current_group_or_tag)

        if also_return_depth:
            top_groups = self.groups()

            final_list = [(group, self._check_in_group(group, top_groups)) for group in final_list]
        return final_list

    @staticmethod
    def _check_in_group(group, group_list) -> bool:
        """ Return True if the group is list.

        Parameters:
            group (HedGroup): The group to check for.
            group_list (list):    A list of groups to search.

        Returns:
            bool: True if group is in the group list.

        """
        for val in group_list:
            if val is group:
                return True
        return False

    def tags(self) -> list:
        """ Return the direct child tags of this group.

        Returns:
            list: All tags directly in this group, filtering out HedGroup children.

        """
        return [tag for tag in self.children if isinstance(tag, HedTag)]

    def groups(self) -> list:
        """ Return the direct child groups of this group.

        Returns:
            list: All groups directly in this group, filtering out HedTag children.

        """
        return [group for group in self.children if isinstance(group, HedGroup)]

    def get_first_group(self) -> HedGroup:
        """ Return the first group in this HED string or group.

            Useful for things like Def-expand where they only have a single group.

            Raises a ValueError if there are no groups.

        Returns:
            HedGroup: The first group.

        """
        return self.groups()[0]

    def get_original_hed_string(self) -> str:
        """ Get the original HED string.

        Returns:
            str: The original string with no modification.

        """
        return self._hed_string[self._startpos:self._endpos]

    @property
    def span(self):
        """ Return the source span.

        Return:
            int: start index of the group (including parentheses) from the source string.
            int: end index of the group (including parentheses) from the source string.

        """
        return self._startpos, self._endpos

    def __str__(self) -> str:
        """ Convert this HedGroup to a string.

        Returns:
            str: The group as a string, including any modified HedTags.

        """
        if self.is_group:
            return "(" + ",".join([str(child) for child in self.children]) + ")"
        return ",".join([str(child) for child in self.children])

    def get_as_short(self) -> str:
        """ Return this HedGroup as a short tag string.

        Returns:
            str: The group as a string with all tags as short tags.

        """
        return self.get_as_form("short_tag")

    def get_as_long(self) -> str:
        """ Return this HedGroup as a long tag string.

        Returns:
            str: The group as a string with all tags as long tags.

        """
        return self.get_as_form("long_tag")

    def get_as_form(self, tag_attribute) -> str:
        """ Get the string corresponding to the specified form.

        Parameters:
            tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

        Returns:
            str: The constructed string after transformation.
        """
        result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else
                           child.get_as_form(tag_attribute) for child in self.children])
        if self.is_group:
            return f"({result})"
        return result

    def lower(self):
        """ Convenience function, equivalent to str(self).lower(). """
        return str(self).lower()

    def casefold(self):
        """ Convenience function, equivalent to str(self).casefold(). """
        return str(self).casefold()

    def get_as_indented(self, tag_attribute="short_tag"):
        """Return the string as a multiline indented format.

        Parameters:
            tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

        Returns:
            formatted_hed (str): The indented string.
        """
        hed_string = self.sorted().get_as_form(tag_attribute)

        level_open = []
        level = 0
        indented = ""
        prev = ''
        for c in hed_string:
            if c == "(":
                level_open.append(level)
                indented += "\n" + "\t" * level + c
                level += 1
            elif c == ")":
                level = level_open.pop()
                if prev == ")":
                    indented += "\n" + "\t" * level + c
                else:
                    indented += c

            else:
                indented += c
            prev = c

        return indented

    def find_placeholder_tag(self) -> Union[HedTag, None]:
        """ Return a placeholder tag, if present in this group.

        Returns:
            Union[HedTag, None]: The placeholder tag if found.

        Notes:
            - Assumes a valid HedString with no erroneous "#" characters.
        """
        for tag in self.get_all_tags():
            if tag.is_placeholder():
                return tag

        return None

    def __bool__(self):
        return bool(self.children)

    def __eq__(self, other):
        """ Test whether other is equal to this object.

            Note: This does not account for sorting.  Objects must be in the same order to match.
        """
        if self is other:
            return True

        # Allow us to compare to a list of groups.
        # Note this comparison will NOT check if the list has the outer parenthesis
        if isinstance(other, list):
            return self.children == other
        if isinstance(other, str):
            return str(self) == other
        if not isinstance(other, HedGroup) or self.children != other.children or self.is_group != other.is_group:
            return False
        return True

    def find_tags(self, search_tags, recursive=False, include_groups=2) -> list:
        """ Find the base tags and their containing groups.
        This searches by short_base_tag, ignoring any ancestors or extensions/values.

        Parameters:
            search_tags (container):  A container of short_base_tags to locate.
            recursive (bool): If true, also check subgroups.
            include_groups (0, 1 or 2): Specify return values.
                If 0: return a list of the HedTags.
                If 1: return a list of the HedGroups containing the HedTags.
                If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

        Returns:
            list: The contents of the list depends on the value of include_groups.
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()
        search_tags = {tag.casefold() for tag in search_tags}
        for tag in tags:
            if tag.short_base_tag.casefold() in search_tags:
                found_tags.append((tag, tag._parent))

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

    def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2) -> list:
        """ Find the tags and their containing groups.

            This searches tag.short_tag.casefold(), with an implicit wildcard on the end.

            e.g. "Eve" will find Event, but not Sensory-event.

        Parameters:
            search_tags (container): A container of the starts of short tags to search.
            recursive (bool): If True, also check subgroups.
            include_groups (0, 1 or 2): Specify return values.
                If 0: return a list of the HedTags.
                If 1: return a list of the HedGroups containing the HedTags.
                If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

        Returns:
            list: The contents of the list depends on the value of include_groups.
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()

        search_tags = {search_tag.casefold() for search_tag in search_tags}

        for tag in tags:
            for search_tag in search_tags:
                if tag.short_tag.casefold().startswith(search_tag):
                    found_tags.append((tag, tag._parent))
                    # We can't find the same tag twice
                    break

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

    def find_exact_tags(self, exact_tags, recursive=False, include_groups=1) -> list:
        """  Find the given tags.  This will only find complete matches, any extension or value must also match.

        Parameters:
            exact_tags (list of HedTag): A container of tags to locate.
            recursive (bool): If true, also check subgroups.
            include_groups (bool): 0, 1 or 2.
                If 0: Return only tags
                If 1: Return only groups
                If 2 or any other value: Return both
        Returns:
            list: A list of tuples. The contents depend on the values of the include_group.
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()

        for tag in tags:
            if tag in exact_tags:
                found_tags.append((tag, tag._parent))

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

    def find_def_tags(self, recursive=False, include_groups=3) -> list:
        """ Find def and def-expand tags.

        Parameters:
            recursive (bool): If true, also check subgroups.
            include_groups (int, 0, 1, 2, 3): Options for return values.
                If 0: Return only def and def expand tags/.
                If 1: Return only def tags and def-expand groups.
                If 2: Return only groups containing defs, or def-expand groups.
                If 3 or any other value: Return all 3 as a tuple.
        Returns:
            list: A list of tuples. The contents depend on the values of the include_group.
        """
        if recursive:
            groups = self.get_all_groups()
            def_tags = []
            for group in groups:
                def_tags += self._get_def_tags_from_group(group)
        else:
            def_tags = self._get_def_tags_from_group(self)

        if include_groups == 0 or include_groups == 1 or include_groups == 2:
            return [tag[include_groups] for tag in def_tags]
        return def_tags

    @staticmethod
    def _get_def_tags_from_group(group):
        def_tags = []
        for child in group.children:
            if isinstance(child, HedTag):
                if child.short_base_tag == DefTagNames.DEF_KEY:
                    def_tags.append((child, child, group))
            else:
                for tag in child.tags():
                    if tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY:
                        def_tags.append((tag, child, group))
        return def_tags

    def find_tags_with_term(self, term, recursive=False, include_groups=2) -> list:
        """  Find any tags that contain the given term.

            Note: This can only find identified tags.

        Parameters:
            term (str): A single term to search for.
            recursive (bool): If true, recursively check subgroups.
            include_groups (0, 1 or 2): Controls return values
                If 0: Return only tags.
                If 1: Return only groups.
                If 2 or any other value: Return both.

        Returns:
            list:
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()

        search_for = term.casefold()
        for tag in tags:
            if search_for in tag.tag_terms:
                found_tags.append((tag, tag._parent))

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

is_group `property` ¶

is_group

True if this is a parenthesized group.

span `property` ¶

span

Return the source span.

Return

int: start index of the group (including parentheses) from the source string. int: end index of the group (including parentheses) from the source string.

append ¶

append(tag_or_group)

Add a tag or group to this group.

Parameters:

Name	Type	Description	Default
`tag_or_group`	`HedTag or HedGroup`	The new object to add to this group.	required

Source code in hed/models/hed_group.py

def append(self, tag_or_group):
    """ Add a tag or group to this group.

    Parameters:
        tag_or_group (HedTag or HedGroup): The new object to add to this group.
    """
    tag_or_group._parent = self
    self.children.append(tag_or_group)

casefold ¶

casefold()

Convenience function, equivalent to str(self).casefold().

Source code in hed/models/hed_group.py

def casefold(self):
    """ Convenience function, equivalent to str(self).casefold(). """
    return str(self).casefold()

check_if_in_original ¶

check_if_in_original(tag_or_group) -> bool

Check if the tag or group in original string.

Parameters:

Name	Type	Description	Default
`tag_or_group`	`HedTag or HedGroup`	The HedTag or HedGroup to be looked for in this group.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if in this group.

Source code in hed/models/hed_group.py

def check_if_in_original(self, tag_or_group) -> bool:
    """ Check if the tag or group in original string.

    Parameters:
        tag_or_group (HedTag or HedGroup): The HedTag or HedGroup to be looked for in this group.

    Returns:
        bool:  True if in this group.
    """
    node_list = [self]
    final_list = []

    # Using an iterator is worse performance wise here.
    while node_list:
        current_group_or_tag = node_list.pop(0)
        if isinstance(current_group_or_tag, HedGroup):
            node_list = current_group_or_tag._original_children + node_list
        final_list.append(current_group_or_tag)

    return self._check_in_group(tag_or_group, final_list)

copy ¶

copy() -> 'HedGroup'

Return a deep copy of this group.

Returns:

Name	Type	Description
`HedGroup`	`'HedGroup'`	The copied group.

Source code in hed/models/hed_group.py

def copy(self) -> "HedGroup":
    """ Return a deep copy of this group.

    Returns:
        HedGroup: The copied group.

    """
    save_parent = self._parent
    self._parent = None
    return_copy = copy.deepcopy(self)
    self._parent = save_parent
    return return_copy

find_def_tags ¶

find_def_tags(recursive=False, include_groups=3) -> list

Find def and def-expand tags.

Parameters:

Name	Type	Description	Default
`recursive`	`bool`	If true, also check subgroups.	`False`
`include_groups`	`(int, 0, 1, 2, 3)`	Options for return values. If 0: Return only def and def expand tags/. If 1: Return only def tags and def-expand groups. If 2: Return only groups containing defs, or def-expand groups. If 3 or any other value: Return all 3 as a tuple.	`3`

Returns: list: A list of tuples. The contents depend on the values of the include_group.

Source code in hed/models/hed_group.py

def find_def_tags(self, recursive=False, include_groups=3) -> list:
    """ Find def and def-expand tags.

    Parameters:
        recursive (bool): If true, also check subgroups.
        include_groups (int, 0, 1, 2, 3): Options for return values.
            If 0: Return only def and def expand tags/.
            If 1: Return only def tags and def-expand groups.
            If 2: Return only groups containing defs, or def-expand groups.
            If 3 or any other value: Return all 3 as a tuple.
    Returns:
        list: A list of tuples. The contents depend on the values of the include_group.
    """
    if recursive:
        groups = self.get_all_groups()
        def_tags = []
        for group in groups:
            def_tags += self._get_def_tags_from_group(group)
    else:
        def_tags = self._get_def_tags_from_group(self)

    if include_groups == 0 or include_groups == 1 or include_groups == 2:
        return [tag[include_groups] for tag in def_tags]
    return def_tags

find_exact_tags ¶

find_exact_tags(
    exact_tags, recursive=False, include_groups=1
) -> list

Find the given tags. This will only find complete matches, any extension or value must also match.

Parameters:

Name	Type	Description	Default
`exact_tags`	`list of HedTag`	A container of tags to locate.	required
`recursive`	`bool`	If true, also check subgroups.	`False`
`include_groups`	`bool`	0, 1 or 2. If 0: Return only tags If 1: Return only groups If 2 or any other value: Return both	`1`

Returns: list: A list of tuples. The contents depend on the values of the include_group.

Source code in hed/models/hed_group.py

def find_exact_tags(self, exact_tags, recursive=False, include_groups=1) -> list:
    """  Find the given tags.  This will only find complete matches, any extension or value must also match.

    Parameters:
        exact_tags (list of HedTag): A container of tags to locate.
        recursive (bool): If true, also check subgroups.
        include_groups (bool): 0, 1 or 2.
            If 0: Return only tags
            If 1: Return only groups
            If 2 or any other value: Return both
    Returns:
        list: A list of tuples. The contents depend on the values of the include_group.
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()

    for tag in tags:
        if tag in exact_tags:
            found_tags.append((tag, tag._parent))

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

find_placeholder_tag ¶

find_placeholder_tag() -> Union[HedTag, None]

Return a placeholder tag, if present in this group.

Returns:

Type	Description
`Union[HedTag, None]`	Union[HedTag, None]: The placeholder tag if found.

Notes

Assumes a valid HedString with no erroneous "#" characters.

Source code in hed/models/hed_group.py

def find_placeholder_tag(self) -> Union[HedTag, None]:
    """ Return a placeholder tag, if present in this group.

    Returns:
        Union[HedTag, None]: The placeholder tag if found.

    Notes:
        - Assumes a valid HedString with no erroneous "#" characters.
    """
    for tag in self.get_all_tags():
        if tag.is_placeholder():
            return tag

    return None

find_tags ¶

find_tags(
    search_tags, recursive=False, include_groups=2
) -> list

Find the base tags and their containing groups. This searches by short_base_tag, ignoring any ancestors or extensions/values.

Parameters:

Name	Type	Description	Default
`search_tags`	`container`	A container of short_base_tags to locate.	required
`recursive`	`bool`	If true, also check subgroups.	`False`
`include_groups`	`(0, 1 or 2)`	Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. If 2: return a list of tuples (HedTag, HedGroup) for the found tags.	`2`

Returns:

Name	Type	Description
`list`	`list`	The contents of the list depends on the value of include_groups.

Source code in hed/models/hed_group.py

def find_tags(self, search_tags, recursive=False, include_groups=2) -> list:
    """ Find the base tags and their containing groups.
    This searches by short_base_tag, ignoring any ancestors or extensions/values.

    Parameters:
        search_tags (container):  A container of short_base_tags to locate.
        recursive (bool): If true, also check subgroups.
        include_groups (0, 1 or 2): Specify return values.
            If 0: return a list of the HedTags.
            If 1: return a list of the HedGroups containing the HedTags.
            If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

    Returns:
        list: The contents of the list depends on the value of include_groups.
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()
    search_tags = {tag.casefold() for tag in search_tags}
    for tag in tags:
        if tag.short_base_tag.casefold() in search_tags:
            found_tags.append((tag, tag._parent))

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

find_tags_with_term ¶

find_tags_with_term(
    term, recursive=False, include_groups=2
) -> list

Find any tags that contain the given term.

Note: This can only find identified tags.

Parameters:

Name	Type	Description	Default
`term`	`str`	A single term to search for.	required
`recursive`	`bool`	If true, recursively check subgroups.	`False`
`include_groups`	`(0, 1 or 2)`	Controls return values If 0: Return only tags. If 1: Return only groups. If 2 or any other value: Return both.	`2`

Returns:

Name	Type	Description
`list`	`list`

Source code in hed/models/hed_group.py

def find_tags_with_term(self, term, recursive=False, include_groups=2) -> list:
    """  Find any tags that contain the given term.

        Note: This can only find identified tags.

    Parameters:
        term (str): A single term to search for.
        recursive (bool): If true, recursively check subgroups.
        include_groups (0, 1 or 2): Controls return values
            If 0: Return only tags.
            If 1: Return only groups.
            If 2 or any other value: Return both.

    Returns:
        list:
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()

    search_for = term.casefold()
    for tag in tags:
        if search_for in tag.tag_terms:
            found_tags.append((tag, tag._parent))

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

find_wildcard_tags ¶

find_wildcard_tags(
    search_tags, recursive=False, include_groups=2
) -> list

Find the tags and their containing groups.

This searches tag.short_tag.casefold(), with an implicit wildcard on the end.

e.g. "Eve" will find Event, but not Sensory-event.

Parameters:

Name	Type	Description	Default
`search_tags`	`container`	A container of the starts of short tags to search.	required
`recursive`	`bool`	If True, also check subgroups.	`False`
`include_groups`	`(0, 1 or 2)`	Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. If 2: return a list of tuples (HedTag, HedGroup) for the found tags.	`2`

Returns:

Name	Type	Description
`list`	`list`	The contents of the list depends on the value of include_groups.

Source code in hed/models/hed_group.py

def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2) -> list:
    """ Find the tags and their containing groups.

        This searches tag.short_tag.casefold(), with an implicit wildcard on the end.

        e.g. "Eve" will find Event, but not Sensory-event.

    Parameters:
        search_tags (container): A container of the starts of short tags to search.
        recursive (bool): If True, also check subgroups.
        include_groups (0, 1 or 2): Specify return values.
            If 0: return a list of the HedTags.
            If 1: return a list of the HedGroups containing the HedTags.
            If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

    Returns:
        list: The contents of the list depends on the value of include_groups.
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()

    search_tags = {search_tag.casefold() for search_tag in search_tags}

    for tag in tags:
        for search_tag in search_tags:
            if tag.short_tag.casefold().startswith(search_tag):
                found_tags.append((tag, tag._parent))
                # We can't find the same tag twice
                break

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

get_all_groups ¶

get_all_groups(also_return_depth=False) -> list

Return HedGroups, including descendants and self.

Parameters:

Name	Type	Description	Default
`also_return_depth`	`bool`	If True, yield tuples (group, depth) rather than just groups.	`False`

Returns:

Name	Type	Description
`list`	`list`	The list of all HedGroups in this group, including descendants and self.

Source code in hed/models/hed_group.py

def get_all_groups(self, also_return_depth=False) -> list:
    """ Return HedGroups, including descendants and self.

    Parameters:
        also_return_depth (bool): If True, yield tuples (group, depth) rather than just groups.

    Returns:
        list: The list of all HedGroups in this group, including descendants and self.

    """
    node_list = [self]
    final_list = []

    # Using an iterator is worse performance wise here.
    while node_list:
        current_group_or_tag = node_list.pop(0)
        if isinstance(current_group_or_tag, HedGroup):
            node_list = list(current_group_or_tag.children) + node_list
            final_list.append(current_group_or_tag)

    if also_return_depth:
        top_groups = self.groups()

        final_list = [(group, self._check_in_group(group, top_groups)) for group in final_list]
    return final_list

get_all_tags ¶

get_all_tags() -> list

Return HedTags, including descendants.

Returns:

Name	Type	Description
`list`	`list`	A list of all the tags in this group including descendants.

Source code in hed/models/hed_group.py

def get_all_tags(self) -> list:
    """ Return HedTags, including descendants.

    Returns:
        list:  A list of all the tags in this group including descendants.

    """
    node_list = [self]
    final_list = []

    # Using an iterator is worse performance wise here.
    while node_list:
        current_group_or_tag = node_list.pop(0)
        if isinstance(current_group_or_tag, HedGroup):
            node_list = list(current_group_or_tag.children) + node_list
        else:
            final_list.append(current_group_or_tag)
    return final_list

get_as_form ¶

get_as_form(tag_attribute) -> str

Get the string corresponding to the specified form.

Parameters:

Name	Type	Description	Default
`tag_attribute`	`str`	The hed_tag property to use to construct the string (usually short_tag or long_tag).	required

Returns:

Name	Type	Description
`str`	`str`	The constructed string after transformation.

Source code in hed/models/hed_group.py

def get_as_form(self, tag_attribute) -> str:
    """ Get the string corresponding to the specified form.

    Parameters:
        tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

    Returns:
        str: The constructed string after transformation.
    """
    result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else
                       child.get_as_form(tag_attribute) for child in self.children])
    if self.is_group:
        return f"({result})"
    return result

get_as_indented ¶

get_as_indented(tag_attribute='short_tag')

Return the string as a multiline indented format.

Parameters:

Name	Type	Description	Default
`tag_attribute`	`str`	The hed_tag property to use to construct the string (usually short_tag or long_tag).	`'short_tag'`

Returns:

Name	Type	Description
`formatted_hed`	`str`	The indented string.

Source code in hed/models/hed_group.py

def get_as_indented(self, tag_attribute="short_tag"):
    """Return the string as a multiline indented format.

    Parameters:
        tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

    Returns:
        formatted_hed (str): The indented string.
    """
    hed_string = self.sorted().get_as_form(tag_attribute)

    level_open = []
    level = 0
    indented = ""
    prev = ''
    for c in hed_string:
        if c == "(":
            level_open.append(level)
            indented += "\n" + "\t" * level + c
            level += 1
        elif c == ")":
            level = level_open.pop()
            if prev == ")":
                indented += "\n" + "\t" * level + c
            else:
                indented += c

        else:
            indented += c
        prev = c

    return indented

get_as_long ¶

get_as_long() -> str

Return this HedGroup as a long tag string.

Returns:

Name	Type	Description
`str`	`str`	The group as a string with all tags as long tags.

Source code in hed/models/hed_group.py

def get_as_long(self) -> str:
    """ Return this HedGroup as a long tag string.

    Returns:
        str: The group as a string with all tags as long tags.

    """
    return self.get_as_form("long_tag")

get_as_short ¶

get_as_short() -> str

Return this HedGroup as a short tag string.

Returns:

Name	Type	Description
`str`	`str`	The group as a string with all tags as short tags.

Source code in hed/models/hed_group.py

def get_as_short(self) -> str:
    """ Return this HedGroup as a short tag string.

    Returns:
        str: The group as a string with all tags as short tags.

    """
    return self.get_as_form("short_tag")

get_first_group ¶

get_first_group() -> HedGroup

Return the first group in this HED string or group.

Useful for things like Def-expand where they only have a single group.

Raises a ValueError if there are no groups.

Returns:

Name	Type	Description
`HedGroup`	`HedGroup`	The first group.

Source code in hed/models/hed_group.py

def get_first_group(self) -> HedGroup:
    """ Return the first group in this HED string or group.

        Useful for things like Def-expand where they only have a single group.

        Raises a ValueError if there are no groups.

    Returns:
        HedGroup: The first group.

    """
    return self.groups()[0]

get_original_hed_string ¶

get_original_hed_string() -> str

Get the original HED string.

Returns:

Name	Type	Description
`str`	`str`	The original string with no modification.

Source code in hed/models/hed_group.py

def get_original_hed_string(self) -> str:
    """ Get the original HED string.

    Returns:
        str: The original string with no modification.

    """
    return self._hed_string[self._startpos:self._endpos]

groups ¶

groups() -> list

Return the direct child groups of this group.

Returns:

Name	Type	Description
`list`	`list`	All groups directly in this group, filtering out HedTag children.

Source code in hed/models/hed_group.py

def groups(self) -> list:
    """ Return the direct child groups of this group.

    Returns:
        list: All groups directly in this group, filtering out HedTag children.

    """
    return [group for group in self.children if isinstance(group, HedGroup)]

lower ¶

lower()

Convenience function, equivalent to str(self).lower().

Source code in hed/models/hed_group.py

def lower(self):
    """ Convenience function, equivalent to str(self).lower(). """
    return str(self).lower()

remove ¶

remove(
    items_to_remove: Iterable[Union[HedTag, "HedGroup"]],
)

Remove any tags/groups in items_to_remove.

Parameters:

Name	Type	Description	Default
`items_to_remove`	`list`	List of HedGroups and/or HedTags to remove by identity.	required

Notes

Any groups that become empty will also be pruned.
If you pass a child and parent group, the child will also be removed from the parent.

Source code in hed/models/hed_group.py

def remove(self, items_to_remove: Iterable[Union[HedTag, 'HedGroup']]):
    """ Remove any tags/groups in items_to_remove.

    Parameters:
        items_to_remove (list):  List of HedGroups and/or HedTags to remove by identity.

    Notes:
        - Any groups that become empty will also be pruned.
        - If you pass a child and parent group, the child will also be removed from the parent.
    """
    empty_groups = []
    # Filter out duplicates
    items_to_remove = {id(item): item for item in items_to_remove}.values()

    for item in items_to_remove:
        group = item._parent
        if group._original_children is group.children:
            group._original_children = group.children.copy()

        group.children.remove(item)
        if not group.children and group is not self:
            empty_groups.append(group)

    if empty_groups:
        self.remove(empty_groups)

    # Do this last to avoid confusing typing
    for item in items_to_remove:
        item._parent = None

replace `staticmethod` ¶

replace(item_to_replace, new_contents)

Replace an existing tag or group.

Note: This is a static method that relies on the parent attribute of item_to_replace.

Parameters:

Name	Type	Description	Default
`item_to_replace`	`HedTag or HedGroup`	The item to replace must exist or this will raise an error.	required
`new_contents`	`HedTag or HedGroup`	Replacement contents.	required

:raises KeyError: - item_to_replace does not exist.

:raises AttributeError: - item_to_replace has no parent set.

Source code in hed/models/hed_group.py

@staticmethod
def replace(item_to_replace, new_contents):
    """ Replace an existing tag or group.

        Note: This is a static method that relies on the parent attribute of item_to_replace.

    Parameters:
        item_to_replace (HedTag or HedGroup): The item to replace must exist or this will raise an error.
        new_contents (HedTag or HedGroup): Replacement contents.

    :raises KeyError:
        - item_to_replace does not exist.

    :raises AttributeError:
        - item_to_replace has no parent set.
    """
    parent = item_to_replace._parent
    parent._replace(item_to_replace=item_to_replace, new_contents=new_contents)

sort ¶

sort()

Sort the tags and groups in this HedString in a consistent order.

Source code in hed/models/hed_group.py

def sort(self):
    """ Sort the tags and groups in this HedString in a consistent order."""
    self._sorted(update_self=True)

sorted ¶

sorted() -> 'HedGroup'

Return a sorted copy of this HED group

Returns:

Name	Type	Description
`sorted_copy`	`HedGroup`	The sorted copy.

Source code in hed/models/hed_group.py

def sorted(self) -> "HedGroup":
    """ Return a sorted copy of this HED group

    Returns:
        sorted_copy (HedGroup): The sorted copy.
    """
    string_copy = self.copy()
    string_copy._sorted(update_self=True)
    return string_copy

tags ¶

tags() -> list

Return the direct child tags of this group.

Returns:

Name	Type	Description
`list`	`list`	All tags directly in this group, filtering out HedGroup children.

Source code in hed/models/hed_group.py

def tags(self) -> list:
    """ Return the direct child tags of this group.

    Returns:
        list: All tags directly in this group, filtering out HedGroup children.

    """
    return [tag for tag in self.children if isinstance(tag, HedTag)]

Sidecar¶

Sidecar ¶

Contents of a JSON file or JSON files.

Source code in hed/models/sidecar.py

class Sidecar:
    """ Contents of a JSON file or JSON files.

    """

    def __init__(self, files, name=None):
        """ Construct a Sidecar object representing a JSON file.

        Parameters:
            files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.
            name (str or None): Optional name identifying this sidecar, generally a filename.
        """
        self.name = name
        self.loaded_dict = self.load_sidecar_files(files)
        self._def_dict = None
        self._extract_definition_issues = []

    def __iter__(self):
        """ An iterator to go over the individual column metadata.

        Returns:
            iterator: An iterator over the column metadata values.

        """
        return iter(self.column_data.values())

    def __getitem__(self, column_name):
        if column_name not in self.loaded_dict:
            return None
        return ColumnMetadata(name=column_name)

    @property
    def all_hed_columns(self):
        """ Return all columns that are HED compatible.

            Returns:
                column_refs(list): A list of all valid HED columns by name.
        """
        possible_column_references = [column.column_name for column in self if column.column_type != ColumnType.Ignore]

        return possible_column_references

    @property
    def def_dict(self) -> 'DefinitionDict':
        """ Definitions from this sidecar.

            Generally you should instead call get_def_dict to get the relevant definitions.

        Returns:
            DefinitionDict: The definitions for this sidecar.
        """
        return self._def_dict

    @property
    def column_data(self):
        """ Generate the ColumnMetadata for this sidecar.

        Returns:
            dict({str:ColumnMetadata}): The column metadata defined by this sidecar.
        """
        return {col_name: ColumnMetadata(name=col_name, source=self.loaded_dict) for col_name in self.loaded_dict}

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
        """ Return the definition dict for this sidecar.

        Parameters:
            hed_schema (HedSchema): Identifies tags to find definitions.
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
            DefinitionDict:  A single definition dict representing all the data(and extra def dicts).
        """
        if self._def_dict is None and hed_schema:
            self._def_dict = self.extract_definitions(hed_schema)
        def_dicts = []
        if self.def_dict:
            def_dicts.append(self.def_dict)
        if extra_def_dicts:
            if not isinstance(extra_def_dicts, list):
                extra_def_dicts = [extra_def_dicts]
            def_dicts += extra_def_dicts
        return DefinitionDict(def_dicts)

    def save_as_json(self, save_filename):
        """ Save column metadata to a JSON file.

        Parameters:
            save_filename (str): Path to save file.

        """
        with open(save_filename, "w") as fp:
            json.dump(self.loaded_dict, fp, indent=4)

    def get_as_json_string(self) -> str:
        """ Return this sidecar's column metadata as a string.

        Returns:
            str: The json string representing this sidecar.

        """
        return json.dumps(self.loaded_dict, indent=4)

    def load_sidecar_file(self, file):
        """ Load column metadata from a given json file.

        Parameters:
            file (str or FileLike): If a string, this is a filename. Otherwise, it will be parsed as a file-like.

        :raises HedFileError:
            - If the file was not found or could not be parsed into JSON.
        """
        if not file:
            return {}
        elif isinstance(file, str):
            if not self.name:
                self.name = file
            try:
                with open(file, "r") as fp:
                    return self._load_json_file(fp)
            except OSError as e:
                raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) from e
        else:
            return self._load_json_file(file)

    def load_sidecar_files(self, files):
        """ Load json from a given file or list.

        Parameters:
            files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.

        :raises HedFileError:
            - If the file was not found or could not be parsed into JSON.

        """
        if not files:
            return {}
        if not isinstance(files, list):
            files = [files]

        merged_dict = {}
        for file in files:
            loaded_json = self.load_sidecar_file(file)
            merged_dict.update(loaded_json)
        return merged_dict

    def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
        """Create a SidecarValidator and validate this sidecar with the schema.

        Parameters:
            hed_schema (HedSchema): Input data to be validated.
            extra_def_dicts (list or DefinitionDict): Extra def dicts in addition to sidecar.
            name (str): The name to report this sidecar as.
            error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

        Returns:
            list[dict]: A list of issues associated with each level in the HED string.
        """
        from hed.validator.sidecar_validator import SidecarValidator

        if error_handler is None:
            error_handler = ErrorHandler()

        validator = SidecarValidator(hed_schema)
        issues = validator.validate(self, extra_def_dicts, name, error_handler=error_handler)
        return issues

    def _load_json_file(self, fp):
        """ Load the raw json of a given file.

        Parameters:
            fp (File-like): The JSON source stream.

        :raises HedFileError:
            - If the file cannot be parsed.
        """
        try:
            return json.load(fp)
        except (json.decoder.JSONDecodeError, AttributeError) as e:
            raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) from e

    def extract_definitions(self, hed_schema, error_handler=None) -> 'DefinitionDict':
        """ Gather and validate definitions in metadata.

        Parameters:
            hed_schema (HedSchema): The schema to used to identify tags.
            error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None.

        Returns:
            DefinitionDict: Contains all the definitions located in the sidecar.

        """
        if error_handler is None:
            error_handler = ErrorHandler()
        def_dict = DefinitionDict()

        self._extract_definition_issues = []
        if hed_schema:
            for column_data in self:
                error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_data.column_name)
                hed_strings = column_data.get_hed_strings()
                for key_name, hed_string in hed_strings.items():
                    hed_string_obj = HedString(hed_string, hed_schema)
                    if len(hed_strings) > 1:
                        error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
                    error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                    self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
                    error_handler.pop_error_context()
                    if len(hed_strings) > 1:
                        error_handler.pop_error_context()

                error_handler.pop_error_context()

        return def_dict

    def get_column_refs(self):
        """ Returns a list of column refs found in this sidecar.

            This does not validate

        Returns:
            column_refs(list): A list of unique column refs found.
        """
        found_vals = set()
        for column_data in self:
            if column_data.column_type == ColumnType.Ignore:
                continue
            hed_strings = column_data.get_hed_strings()
            matches = hed_strings.str.findall(r"\{([a-z_\-0-9]+)\}", re.IGNORECASE)
            u_vals = [match for sublist in matches for match in sublist]

            found_vals.update(u_vals)

        return list(found_vals)

all_hed_columns `property` ¶

all_hed_columns

Return all columns that are HED compatible.

Returns:

Name	Type	Description
`column_refs`	`list`	A list of all valid HED columns by name.

column_data `property` ¶

column_data

Generate the ColumnMetadata for this sidecar.

Returns:

Name	Type	Description
`dict`	`{str: ColumnMetadata}`	The column metadata defined by this sidecar.

def_dict `property` ¶

def_dict: DefinitionDict

Definitions from this sidecar.

Generally you should instead call get_def_dict to get the relevant definitions.

Returns:

Name	Type	Description
`DefinitionDict`	`DefinitionDict`	The definitions for this sidecar.

extract_definitions ¶

extract_definitions(
    hed_schema, error_handler=None
) -> DefinitionDict

Gather and validate definitions in metadata.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	The schema to used to identify tags.	required
`error_handler`	`ErrorHandler or None`	The error handler to use for context, uses a default one if None.	`None`

Returns:

Name	Type	Description
`DefinitionDict`	`DefinitionDict`	Contains all the definitions located in the sidecar.

Source code in hed/models/sidecar.py

def extract_definitions(self, hed_schema, error_handler=None) -> 'DefinitionDict':
    """ Gather and validate definitions in metadata.

    Parameters:
        hed_schema (HedSchema): The schema to used to identify tags.
        error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None.

    Returns:
        DefinitionDict: Contains all the definitions located in the sidecar.

    """
    if error_handler is None:
        error_handler = ErrorHandler()
    def_dict = DefinitionDict()

    self._extract_definition_issues = []
    if hed_schema:
        for column_data in self:
            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_data.column_name)
            hed_strings = column_data.get_hed_strings()
            for key_name, hed_string in hed_strings.items():
                hed_string_obj = HedString(hed_string, hed_schema)
                if len(hed_strings) > 1:
                    error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
                error_handler.pop_error_context()
                if len(hed_strings) > 1:
                    error_handler.pop_error_context()

            error_handler.pop_error_context()

    return def_dict

get_as_json_string ¶

get_as_json_string() -> str

Return this sidecar's column metadata as a string.

Returns:

Name	Type	Description
`str`	`str`	The json string representing this sidecar.

Source code in hed/models/sidecar.py

def get_as_json_string(self) -> str:
    """ Return this sidecar's column metadata as a string.

    Returns:
        str: The json string representing this sidecar.

    """
    return json.dumps(self.loaded_dict, indent=4)

get_column_refs ¶

get_column_refs()

Returns a list of column refs found in this sidecar.

This does not validate

Returns:

Name	Type	Description
`column_refs`	`list`	A list of unique column refs found.

Source code in hed/models/sidecar.py

def get_column_refs(self):
    """ Returns a list of column refs found in this sidecar.

        This does not validate

    Returns:
        column_refs(list): A list of unique column refs found.
    """
    found_vals = set()
    for column_data in self:
        if column_data.column_type == ColumnType.Ignore:
            continue
        hed_strings = column_data.get_hed_strings()
        matches = hed_strings.str.findall(r"\{([a-z_\-0-9]+)\}", re.IGNORECASE)
        u_vals = [match for sublist in matches for match in sublist]

        found_vals.update(u_vals)

    return list(found_vals)

get_def_dict ¶

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> DefinitionDict

Return the definition dict for this sidecar.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	Identifies tags to find definitions.	required
`extra_def_dicts`	`list, DefinitionDict, or None`	Extra dicts to add to the list.	`None`

Returns:

Name	Type	Description
`DefinitionDict`	`DefinitionDict`	A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/sidecar.py

def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
    """ Return the definition dict for this sidecar.

    Parameters:
        hed_schema (HedSchema): Identifies tags to find definitions.
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
        DefinitionDict:  A single definition dict representing all the data(and extra def dicts).
    """
    if self._def_dict is None and hed_schema:
        self._def_dict = self.extract_definitions(hed_schema)
    def_dicts = []
    if self.def_dict:
        def_dicts.append(self.def_dict)
    if extra_def_dicts:
        if not isinstance(extra_def_dicts, list):
            extra_def_dicts = [extra_def_dicts]
        def_dicts += extra_def_dicts
    return DefinitionDict(def_dicts)

load_sidecar_file ¶

load_sidecar_file(file)

Load column metadata from a given json file.

Parameters:

Name	Type	Description	Default
`file`	`str or FileLike`	If a string, this is a filename. Otherwise, it will be parsed as a file-like.	required

:raises HedFileError: - If the file was not found or could not be parsed into JSON.

Source code in hed/models/sidecar.py

def load_sidecar_file(self, file):
    """ Load column metadata from a given json file.

    Parameters:
        file (str or FileLike): If a string, this is a filename. Otherwise, it will be parsed as a file-like.

    :raises HedFileError:
        - If the file was not found or could not be parsed into JSON.
    """
    if not file:
        return {}
    elif isinstance(file, str):
        if not self.name:
            self.name = file
        try:
            with open(file, "r") as fp:
                return self._load_json_file(fp)
        except OSError as e:
            raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) from e
    else:
        return self._load_json_file(file)

load_sidecar_files ¶

load_sidecar_files(files)

Load json from a given file or list.

Parameters:

Name	Type	Description	Default
`files`	`str or FileLike or list`	A string or file-like object representing a JSON file, or a list of such.	required

:raises HedFileError: - If the file was not found or could not be parsed into JSON.

Source code in hed/models/sidecar.py

def load_sidecar_files(self, files):
    """ Load json from a given file or list.

    Parameters:
        files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.

    :raises HedFileError:
        - If the file was not found or could not be parsed into JSON.

    """
    if not files:
        return {}
    if not isinstance(files, list):
        files = [files]

    merged_dict = {}
    for file in files:
        loaded_json = self.load_sidecar_file(file)
        merged_dict.update(loaded_json)
    return merged_dict

save_as_json ¶

save_as_json(save_filename)

Save column metadata to a JSON file.

Parameters:

Name	Type	Description	Default
`save_filename`	`str`	Path to save file.	required

Source code in hed/models/sidecar.py

def save_as_json(self, save_filename):
    """ Save column metadata to a JSON file.

    Parameters:
        save_filename (str): Path to save file.

    """
    with open(save_filename, "w") as fp:
        json.dump(self.loaded_dict, fp, indent=4)

validate ¶

validate(
    hed_schema,
    extra_def_dicts=None,
    name=None,
    error_handler=None,
) -> list[dict]

Create a SidecarValidator and validate this sidecar with the schema.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	Input data to be validated.	required
`extra_def_dicts`	`list or DefinitionDict`	Extra def dicts in addition to sidecar.	`None`
`name`	`str`	The name to report this sidecar as.	`None`
`error_handler`	`ErrorHandler`	Error context to use. Creates a new one if None.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: A list of issues associated with each level in the HED string.

Source code in hed/models/sidecar.py

def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
    """Create a SidecarValidator and validate this sidecar with the schema.

    Parameters:
        hed_schema (HedSchema): Input data to be validated.
        extra_def_dicts (list or DefinitionDict): Extra def dicts in addition to sidecar.
        name (str): The name to report this sidecar as.
        error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

    Returns:
        list[dict]: A list of issues associated with each level in the HED string.
    """
    from hed.validator.sidecar_validator import SidecarValidator

    if error_handler is None:
        error_handler = ErrorHandler()

    validator = SidecarValidator(hed_schema)
    issues = validator.validate(self, extra_def_dicts, name, error_handler=error_handler)
    return issues

TabularInput¶

TabularInput ¶

Bases: BaseInput

A BIDS tabular file with sidecar.

Source code in hed/models/tabular_input.py

class TabularInput(BaseInput):
    """ A BIDS tabular file with sidecar. """

    HED_COLUMN_NAME = "HED"

    def __init__(self, file=None, sidecar=None, name=None):

        """ Constructor for the TabularInput class.

        Parameters:
            file (str or FileLike or pd.Dataframe): A tsv file to open.
            sidecar (str or Sidecar or FileLike): A Sidecar or source file/filename.
            name (str): The name to display for this file for error purposes.

        :raises HedFileError:
            - The file is blank.
            - An invalid dataframe was passed with size 0.
            - An invalid extension was provided.
            - A duplicate or empty column name appears.

        :raises OSError:
            - Cannot open the indicated file.

        :raises ValueError:
            - This file has no column names.
        """
        if sidecar and not isinstance(sidecar, Sidecar):
            sidecar = Sidecar(sidecar)
        new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME],
                                  warn_on_missing_column=True)

        self._sidecar = sidecar

        super().__init__(file, file_type=".tsv", worksheet_name=None, has_column_names=True, mapper=new_mapper,
                         name=name, allow_blank_names=False, )

        if not self._has_column_names:
            raise ValueError("You are attempting to open a bids_old style file with no column headers provided.\n"
                             "This is probably not intended.")

    def reset_column_mapper(self, sidecar=None):
        """ Change the sidecars and settings.

        Parameters:
            sidecar (str or [str] or Sidecar or [Sidecar]): A list of json filenames to pull sidecar info from.

        """
        new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
        self._sidecar = sidecar

        self.reset_mapper(new_mapper)

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
        """ Return the definition dict for this sidecar.

        Parameters:
            hed_schema (HedSchema): Used to identify tags to find definitions.
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
            DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
        """
        if self._sidecar:
            return self._sidecar.get_def_dict(hed_schema, extra_def_dicts)
        else:
            return super().get_def_dict(hed_schema, extra_def_dicts)

    def get_column_refs(self) -> list[str]:
        """ Return a list of column refs for this file.

            Default implementation returns none.

        Returns:
            list[str]: A list of unique column refs found.
        """
        if self._sidecar:
            return self._sidecar.get_column_refs()
        return []

    def get_sidecar(self) -> Union[Sidecar, None]:
        """Return the sidecar associated with this TabularInput."""
        return self._sidecar

get_column_refs ¶

get_column_refs() -> list[str]

Return a list of column refs for this file.

Default implementation returns none.

Returns:

Type	Description
`list[str]`	list[str]: A list of unique column refs found.

Source code in hed/models/tabular_input.py

def get_column_refs(self) -> list[str]:
    """ Return a list of column refs for this file.

        Default implementation returns none.

    Returns:
        list[str]: A list of unique column refs found.
    """
    if self._sidecar:
        return self._sidecar.get_column_refs()
    return []

get_def_dict ¶

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> "DefinitionDict"

Return the definition dict for this sidecar.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	Used to identify tags to find definitions.	required
`extra_def_dicts`	`list, DefinitionDict, or None`	Extra dicts to add to the list.	`None`

Returns:

Name	Type	Description
`DefinitionDict`	`'DefinitionDict'`	A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/tabular_input.py

def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
    """ Return the definition dict for this sidecar.

    Parameters:
        hed_schema (HedSchema): Used to identify tags to find definitions.
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
        DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
    """
    if self._sidecar:
        return self._sidecar.get_def_dict(hed_schema, extra_def_dicts)
    else:
        return super().get_def_dict(hed_schema, extra_def_dicts)

get_sidecar ¶

get_sidecar() -> Union[Sidecar, None]

Return the sidecar associated with this TabularInput.

Source code in hed/models/tabular_input.py

def get_sidecar(self) -> Union[Sidecar, None]:
    """Return the sidecar associated with this TabularInput."""
    return self._sidecar

reset_column_mapper ¶

reset_column_mapper(sidecar=None)

Change the sidecars and settings.

Parameters:

Name	Type	Description	Default
`sidecar`	`str or [str] or Sidecar or [Sidecar]`	A list of json filenames to pull sidecar info from.	`None`

Source code in hed/models/tabular_input.py

def reset_column_mapper(self, sidecar=None):
    """ Change the sidecars and settings.

    Parameters:
        sidecar (str or [str] or Sidecar or [Sidecar]): A list of json filenames to pull sidecar info from.

    """
    new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
    self._sidecar = sidecar

    self.reset_mapper(new_mapper)

SpreadsheetInput¶

SpreadsheetInput ¶

Bases: BaseInput

A spreadsheet of HED tags.

Source code in hed/models/spreadsheet_input.py

class SpreadsheetInput(BaseInput):
    """ A spreadsheet of HED tags. """

    def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=None,
                 has_column_names=True, column_prefix_dictionary=None,
                 name=None):
        """Constructor for the SpreadsheetInput class.

        Parameters:
            file (str or file like): An xlsx/tsv file to open or a File object.
            file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data.
            worksheet_name (str or None): The name of the Excel workbook worksheet that contains the HED tags.
                Not applicable to tsv files. If omitted for Excel, the first worksheet is assumed.
            tag_columns (list): A list of ints or strs containing the columns that contain the HED tags.
                If ints then column numbers with [1] indicating only the second column has tags.
            has_column_names (bool): True if file has column names. Validation will skip over the first row.
                first line of the file if the spreadsheet as column names.
            column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and
                values are HED tag prefixes to prepend to the tags in that column before processing.

        Notes:
            - If file is a string, file_type is derived from file and this parameter is ignored.
            - column_prefix_dictionary may be deprecated/renamed.  These are no longer prefixes,
              but rather converted to value columns.
              e.g. {"key": "Description", 1: "Label/"} will turn into value columns as
              {"key": "Description/#", 1: "Label/#"}
              It will be a validation issue if column 1 is called "key" in the above example.
              This means it no longer accepts anything but the value portion only in the columns.

        :raises HedFileError:
            - The file is blank.
            - An invalid dataframe was passed with size 0.
            - An invalid extension was provided.
            - A duplicate or empty column name appears.
            - Cannot open the indicated file.
            - The specified worksheet name does not exist.
        """

        self.tag_columns = tag_columns
        new_mapper = ColumnMapper(tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary,
                                  warn_on_missing_column=False)

        super().__init__(file, file_type, worksheet_name, has_column_names, new_mapper, name=name)

BaseInput¶

BaseInput ¶

Superclass representing a basic columnar file.

Source code in hed/models/base_input.py

class BaseInput:
    """ Superclass representing a basic columnar file. """

    TEXT_EXTENSION = ['.tsv', '.txt']
    EXCEL_EXTENSION = ['.xlsx']

    def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, name=None,
                 allow_blank_names=True):
        """ Constructor for the BaseInput class.

        Parameters:
            file (str or file-like or pd.Dataframe): An xlsx/tsv file to open.
            file_type (str or None): ".xlsx" (Excel), ".tsv" or ".txt" (tab-separated text).
                Derived from file if file is a filename.  Ignored if pandas dataframe.
            worksheet_name (str or None): Name of Excel workbook worksheet name to use.
                (Not applicable to tsv files.)
            has_column_names (bool): True if file has column names.
                This value is ignored if you pass in a pandas dataframe.
            mapper (ColumnMapper or None):  Indicates which columns have HED tags.
                See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.
            name (str or None): Optional field for how this file will report errors.
            allow_blank_names(bool): If True, column names can be blank

        :raises HedFileError:
            - file is blank.
            - An invalid dataframe was passed with size 0.
            - An invalid extension was provided.
            - A duplicate or empty column name appears.
            - Cannot open the indicated file.
            - The specified worksheet name does not exist.
            - If the sidecar file or tabular file had invalid format and could not be read.

         """
        if mapper is None:
            mapper = ColumnMapper()
        self._mapper = mapper
        self._has_column_names = has_column_names
        self._name = name
        # This is the loaded workbook if we loaded originally from an Excel file.
        self._loaded_workbook = None
        self._worksheet_name = worksheet_name
        self._dataframe = None

        input_type = file_type
        if isinstance(file, str):
            if file_type is None:
                _, input_type = os.path.splitext(file)
            if self.name is None:
                self._name = file

        self._open_dataframe_file(file, has_column_names, input_type)

        column_issues = ColumnMapper.check_for_blank_names(self.columns, allow_blank_names=allow_blank_names)
        if column_issues:
            raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.",
                               self.name, issues=column_issues)

        self.reset_mapper(mapper)

    def reset_mapper(self, new_mapper):
        """ Set mapper to a different view of the file.

        Parameters:
            new_mapper (ColumnMapper): A column mapper to be associated with this base input.
        """
        self._mapper = new_mapper
        if not self._mapper:
            self._mapper = ColumnMapper()

        if self._dataframe is not None and self._has_column_names:
            columns = self._dataframe.columns
            self._mapper.set_column_map(columns)

    @property
    def dataframe(self):
        """ The underlying dataframe. """
        return self._dataframe

    @property
    def dataframe_a(self) ->pd.DataFrame:
        """Return the assembled dataframe Probably a placeholder name.

        Returns:
            pd.Dataframe: the assembled dataframe"""
        return self.assemble()

    @property
    def series_a(self) ->pd.Series:
        """Return the assembled dataframe as a series.

        Returns:
            pd.Series: the assembled dataframe with columns merged.
        """

        return self.combine_dataframe(self.assemble())

    @property
    def series_filtered(self) -> Union[pd.Series, None]:
        """Return the assembled dataframe as a series, with rows that have the same onset combined.

        Returns:
            Union[pd.Series, None] the assembled dataframe with columns merged, and the rows filtered together.
        """
        if self.onsets is not None:
            return filter_series_by_onset(self.series_a, self.onsets)
        return None

    @property
    def onsets(self):
        """Return the onset column if it exists. """
        if "onset" in self.columns:
            return self._dataframe["onset"]
        return None

    @property
    def needs_sorting(self) -> bool:
        """Return True if this both has an onset column, and it needs sorting."""
        onsets = self.onsets
        if onsets is not None:
            onsets = pd.to_numeric(self.dataframe['onset'], errors='coerce')
            return not onsets.is_monotonic_increasing
        else:
            return False

    @property
    def name(self) -> str:
        """ Name of the data. """
        return self._name

    @property
    def has_column_names(self) -> bool:
        """ True if dataframe has column names. """
        return self._has_column_names

    @property
    def loaded_workbook(self):
        """ The underlying loaded workbooks. """
        return self._loaded_workbook

    @property
    def worksheet_name(self):
        """ The worksheet name. """
        return self._worksheet_name

    def convert_to_form(self, hed_schema, tag_form):
        """ Convert all tags in underlying dataframe to the specified form.

        Parameters:
            hed_schema (HedSchema): The schema to use to convert tags.
            tag_form (str): HedTag property to convert tags to.
                Most cases should use convert_to_short or convert_to_long below.
        """
        from hed.models.df_util import convert_to_form
        convert_to_form(self._dataframe, hed_schema, tag_form, self._mapper.get_tag_columns())

    def convert_to_short(self, hed_schema):
        """ Convert all tags in underlying dataframe to short form.

        Parameters:
            hed_schema (HedSchema): The schema to use to convert tags.

        """
        self.convert_to_form(hed_schema, "short_tag")

    def convert_to_long(self, hed_schema):
        """ Convert all tags in underlying dataframe to long form.

        Parameters:
            hed_schema (HedSchema or None): The schema to use to convert tags.
        """
        self.convert_to_form(hed_schema, "long_tag")

    def shrink_defs(self, hed_schema):
        """ Shrinks any def-expand found in the underlying dataframe.

        Parameters:
            hed_schema (HedSchema or None): The schema to use to identify defs.
        """
        from df_util import shrink_defs
        shrink_defs(self._dataframe, hed_schema=hed_schema, columns=self._mapper.get_tag_columns())

    def expand_defs(self, hed_schema, def_dict):
        """ Shrinks any def-expand found in the underlying dataframe.

        Parameters:
            hed_schema (HedSchema or None): The schema to use to identify defs.
            def_dict (DefinitionDict): The definitions to expand.
        """
        from df_util import expand_defs
        expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns())

    def to_excel(self, file):
        """ Output to an Excel file.

        Parameters:
            file (str or file-like): Location to save this base input.

        :raises ValueError:
            - If empty file object was passed.

        :raises OSError:
            - Cannot open the indicated file.
        """
        if not file:
            raise ValueError("Empty file name or object passed in to BaseInput.save.")

        dataframe = self._dataframe
        if self._loaded_workbook:
            old_worksheet = self.get_worksheet(self._worksheet_name)
            # Excel spreadsheets are 1 based, then add another 1 for column names if present
            adj_row_for_col_names = 1
            if self._has_column_names:
                adj_row_for_col_names += 1
            adj_for_one_based_cols = 1
            for row_number, text_file_row in dataframe.iterrows():
                for column_number, column_text in enumerate(text_file_row):
                    cell_value = dataframe.iloc[row_number, column_number]
                    old_worksheet.cell(row_number + adj_row_for_col_names,
                                       column_number + adj_for_one_based_cols).value = cell_value

            self._loaded_workbook.save(file)
        else:
            dataframe.to_excel(file, header=self._has_column_names)

    def to_csv(self, file=None):
        """ Write to file or return as a string.

        Parameters:
            file (str, file-like, or None): Location to save this file. If None, return as string.
        Returns:
            None or str:  None if file is given or the contents as a str if file is None.

        :raises OSError:
            - Cannot open the indicated file.
        """
        dataframe = self._dataframe
        csv_string_if_filename_none = dataframe.to_csv(file, sep='\t', index=False, header=self._has_column_names)
        return csv_string_if_filename_none

    @property
    def columns(self):
        """ Returns a list of the column names.

            Empty if no column names.

        Returns:
            columns(list): The column names.
        """
        columns = []
        if self._dataframe is not None and self._has_column_names:
            columns = list(self._dataframe.columns)
        return columns

    def column_metadata(self) -> dict[int, 'ColumnMeta']:
        """ Return the metadata for each column.

        Returns:
            dict[int, 'ColumnMeta']: Number/ColumnMeta pairs.
        """
        if self._mapper:
            return self._mapper._final_column_map
        return {}

    def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_tag"):
        """ Replace the specified cell with transformed text.

        Parameters:
            row_number (int):    The row number of the spreadsheet to set.
            column_number (int): The column number of the spreadsheet to set.
            new_string_obj (HedString): Object with text to put in the given cell.
            tag_form (str): Version of the tags (short_tag, long_tag, base_tag, etc.)

        Notes:
             Any attribute of a HedTag that returns a string is a valid value of tag_form.

        :raises ValueError:
            - There is not a loaded dataframe.

        :raises KeyError:
            - The indicated row/column does not exist.

        :raises AttributeError:
            - The indicated tag_form is not an attribute of HedTag.
        """
        if self._dataframe is None:
            raise ValueError("No data frame loaded")

        new_text = new_string_obj.get_as_form(tag_form)
        self._dataframe.iloc[row_number, column_number] = new_text

    def get_worksheet(self, worksheet_name=None) -> Union[openpyxl.workbook.Workbook, None]:
        """ Get the requested worksheet.

        Parameters:
            worksheet_name (str or None): The name of the requested worksheet by name or the first one if None.

        Returns:
            Union[openpyxl.workbook.Workbook, None]: The workbook request.

        Notes:
            If None, returns the first worksheet.

        :raises KeyError:
            - The specified worksheet name does not exist.
        """
        if worksheet_name and self._loaded_workbook:
            # return self._loaded_workbook.get_sheet_by_name(worksheet_name)
            return self._loaded_workbook[worksheet_name]
        elif self._loaded_workbook:
            return self._loaded_workbook.worksheets[0]
        else:
            return None

    @staticmethod
    def _get_dataframe_from_worksheet(worksheet, has_headers) -> pd.DataFrame:
        """ Create a dataframe from the worksheet.

        Parameters:
            worksheet (Worksheet): The loaded worksheet to convert.
            has_headers (bool): True if this worksheet has column headers.

        Returns:
            pd.DataFrame: The converted data frame.

        """
        if has_headers:
            data = worksheet.values
            # first row is columns
            cols = next(data)
            data = list(data)
            return pd.DataFrame(data, columns=cols, dtype=str)
        else:
            return pd.DataFrame(worksheet.values, dtype=str)

    def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
        """Creates a SpreadsheetValidator and returns all issues with this file.

        Parameters:
            hed_schema (HedSchema): The schema to use for validation.
            extra_def_dicts (list of DefDict or DefDict): All definitions to use for validation.
            name (str): The name to report errors from this file as.
            error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

        Returns:
            list[dict]: A list of issues for a HED string.
        """
        from hed.validator.spreadsheet_validator import SpreadsheetValidator
        if not name:
            name = self.name
        tab_validator = SpreadsheetValidator(hed_schema)
        validation_issues = tab_validator.validate(self, self._mapper.get_def_dict(hed_schema, extra_def_dicts), name,
                                                   error_handler=error_handler)
        return validation_issues

    @staticmethod
    def _dataframe_has_names(dataframe) -> bool:
        for column in dataframe.columns:
            if isinstance(column, str):
                return True
        return False

    def assemble(self, mapper=None, skip_curly_braces=False) ->pd.DataFrame:
        """ Assembles the HED strings.

        Parameters:
            mapper (ColumnMapper or None): Generally pass none here unless you want special behavior.
            skip_curly_braces (bool): If True, don't plug in curly brace values into columns.
        Returns:
            pd.Dataframe: The assembled dataframe.
        """
        if mapper is None:
            mapper = self._mapper

        all_columns = self._handle_transforms(mapper)
        if skip_curly_braces:
            return all_columns
        transformers, _ = mapper.get_transformers()
        refs = self.get_column_refs()
        column_names = list(transformers)
        return _handle_curly_braces_refs(all_columns, refs, column_names)

    def _handle_transforms(self, mapper) -> pd.DataFrame:
        """ Apply transformations to the dataframe using the provided mapper.

        Parameters:
            mapper: The column mapper object containing transformation functions.

        Returns:
            pd.DataFrame: The transformed dataframe with all transformations applied.

        Notes:
            - Handles categorical column conversions before and after transformations
            - Returns original dataframe if no transformers are defined
            - Categorical columns are temporarily converted to 'category' type for processing
              then converted back to 'str' type after transformation
        """
        transformers, need_categorical = mapper.get_transformers()
        if transformers:
            all_columns = self._dataframe
            if need_categorical:
                all_columns[need_categorical] = all_columns[need_categorical].astype('category')

            all_columns = all_columns.transform(transformers)

            if need_categorical:
                all_columns[need_categorical] = all_columns[need_categorical].astype('str')
        else:
            all_columns = self._dataframe

        return all_columns

    @staticmethod
    def combine_dataframe(dataframe) ->pd.Series:
        """ Combine all columns in the given dataframe into a single HED string series,
            skipping empty columns and columns with empty strings.

        Parameters:
            dataframe (pd.Dataframe): The dataframe to combine

        Returns:
            pd.Series: The assembled series.
        """
        dataframe = dataframe.apply(
            lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))),
            axis=1
        )
        return dataframe

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
        """ Return the definition dict for this file.

        Note: Baseclass implementation returns just extra_def_dicts.

        Parameters:
            hed_schema (HedSchema): Identifies tags to find definitions(if needed).
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
            DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
        """
        from hed.models.definition_dict import DefinitionDict
        return DefinitionDict(extra_def_dicts, hed_schema)

    def get_column_refs(self) -> list:
        """ Return a list of column refs for this file.

            Default implementation returns empty list.

        Returns:
           list: A list of unique column refs found.
        """
        return []

    def _open_dataframe_file(self, file, has_column_names, input_type):
        """ Load data from various file types into the internal DataFrame.

        This method handles loading data from different file formats including Excel files,
        text files (TSV/CSV), and existing pandas DataFrames. It sets the _dataframe property
        and handles appropriate type conversions and error handling for each file type.

        Parameters:
            file (str, file-like, or pd.DataFrame): The input data source.
                - str: File path to load from
                - file-like: File object to read from
                - pd.DataFrame: Existing DataFrame to use directly
            has_column_names (bool): Whether the file contains column headers.
                Used to determine pandas header parameter for text files.
            input_type (str): File extension indicating the file type.
                Supported types: '.xlsx' (Excel), '.tsv', '.txt' (tab-separated text).

        Raises:
            HedFileError:
                - If file is empty or None (FILE_NOT_FOUND)
                - If unsupported file extension provided (INVALID_EXTENSION)
                - If file loading fails due to format issues (INVALID_FILE_FORMAT)

        Notes:
            - For DataFrame input: Converts to string type and auto-detects column names
            - For Excel files: Loads workbook and converts specified worksheet to DataFrame
            - For text files: Uses pandas read_csv with tab delimiter and handles empty files
            - All loaded data is converted to string type for consistency
            - NaN values in text files are replaced with "n/a"
        """
        pandas_header = 0 if has_column_names else None

        # If file is already a DataFrame
        if isinstance(file, pd.DataFrame):
            self._dataframe = file.astype(str)
            self._has_column_names = self._dataframe_has_names(self._dataframe)
            return

        # Check for empty file or None
        if not file:
            raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file specification passed to BaseInput.", file)

        # Handle Excel file input
        if input_type in self.EXCEL_EXTENSION:
            self._load_excel_file(file, has_column_names)
            return

        # Handle unsupported file extensions
        if input_type not in self.TEXT_EXTENSION:
            raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unsupported file extension for text files.",
                               self.name)

        # Handle text file input (CSV/TSV)
        self._load_text_file(file, pandas_header)

    def _load_excel_file(self, file, has_column_names):
        """ Load an Excel file into a pandas DataFrame.

        This method loads an Excel workbook using openpyxl, retrieves the specified
        worksheet (or the first one if none specified), and converts it to a pandas
        DataFrame. The loaded workbook is stored for potential later use in saving.

        Parameters:
            file (str or file-like): Path to the Excel file or file-like object to load.
                Must be a valid Excel file format (.xlsx).
            has_column_names (bool): Whether the first row of the worksheet contains
                column headers that should be used as DataFrame column names.

        Raises:
            HedFileError: If loading fails due to file format issues, missing file,
                corrupted Excel file, or any other openpyxl-related errors.
                The original exception is chained for debugging purposes.

        Notes:
            - Uses openpyxl library for Excel file handling
            - Stores the loaded workbook in self._loaded_workbook for later use
            - Retrieves worksheet using self._worksheet_name (or first sheet if None)
            - Converts worksheet data to DataFrame using _get_dataframe_from_worksheet
            - All data is converted to string type for consistency
        """
        try:
            self._loaded_workbook = openpyxl.load_workbook(file)
            loaded_worksheet = self.get_worksheet(self._worksheet_name)
            self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
        except Exception as e:
            raise HedFileError(HedExceptions.INVALID_FILE_FORMAT,
                               f"Failed to load Excel file: {str(e)}", self.name) from e

    def _load_text_file(self, file, pandas_header):
        """ Load a text file (TSV/CSV) into a pandas DataFrame.

        This method handles loading tab-separated value files and other text-based
        formats using pandas read_csv. It includes special handling for empty files,
        proper NaN value replacement, and comprehensive error handling.

        Parameters:
            file (str or file-like): Path to the text file or file-like object to load.
                Can be any format supported by pandas read_csv with tab delimiter.
            pandas_header (int or None): Row number to use as column headers.
                - 0: First row contains headers
                - None: No header row, generate default column names

        Raises:
            HedFileError: If loading fails due to file format issues, encoding problems,
                or any other pandas-related errors. The original exception is chained
                for debugging purposes.

        Notes:
            - Uses tab delimiter for parsing (appropriate for .tsv files)
            - Handles empty files by creating an empty DataFrame
            - Converts all data to string type for consistency
            - Replaces NaN values with "n/a" for consistent handling
            - Skips blank lines during parsing
            - Uses specific na_values configuration ("", "null")
            - Handles pandas.errors.EmptyDataError for files with no data
        """
        if isinstance(file, str) and os.path.exists(file) and os.path.getsize(file) == 0:
            self._dataframe = pd.DataFrame()  # Handle empty file
            return

        try:
            self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header, skip_blank_lines=True,
                                          dtype=str, keep_default_na=True, na_values=("", "null"))
            # Replace NaN values with a known value
            self._dataframe = self._dataframe.fillna("n/a")
        except pd.errors.EmptyDataError:
            self._dataframe = pd.DataFrame()  # Handle case where file has no data
        except Exception as e:
            raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load text file: {str(e)}",
                               self.name) from e

columns `property` ¶

columns

Returns a list of the column names.

Empty if no column names.

Returns:

Name	Type	Description
`columns`	`list`	The column names.

dataframe `property` ¶

dataframe

The underlying dataframe.

dataframe_a `property` ¶

dataframe_a: DataFrame

Return the assembled dataframe Probably a placeholder name.

Returns:

Type	Description
`DataFrame`	pd.Dataframe: the assembled dataframe

has_column_names `property` ¶

has_column_names: bool

True if dataframe has column names.

loaded_workbook `property` ¶

loaded_workbook

The underlying loaded workbooks.

name `property` ¶

name: str

Name of the data.

needs_sorting `property` ¶

needs_sorting: bool

Return True if this both has an onset column, and it needs sorting.

onsets `property` ¶

onsets

Return the onset column if it exists.

series_a `property` ¶

series_a: Series

Return the assembled dataframe as a series.

Returns:

Type	Description
`Series`	pd.Series: the assembled dataframe with columns merged.

series_filtered `property` ¶

series_filtered: Union[Series, None]

Return the assembled dataframe as a series, with rows that have the same onset combined.

Returns:

Type	Description
`Union[Series, None]`	Union[pd.Series, None] the assembled dataframe with columns merged, and the rows filtered together.

worksheet_name `property` ¶

worksheet_name

The worksheet name.

assemble ¶

assemble(
    mapper=None, skip_curly_braces=False
) -> pd.DataFrame

Assembles the HED strings.

Parameters:

Name	Type	Description	Default
`mapper`	`ColumnMapper or None`	Generally pass none here unless you want special behavior.	`None`
`skip_curly_braces`	`bool`	If True, don't plug in curly brace values into columns.	`False`

Returns: pd.Dataframe: The assembled dataframe.

Source code in hed/models/base_input.py

def assemble(self, mapper=None, skip_curly_braces=False) ->pd.DataFrame:
    """ Assembles the HED strings.

    Parameters:
        mapper (ColumnMapper or None): Generally pass none here unless you want special behavior.
        skip_curly_braces (bool): If True, don't plug in curly brace values into columns.
    Returns:
        pd.Dataframe: The assembled dataframe.
    """
    if mapper is None:
        mapper = self._mapper

    all_columns = self._handle_transforms(mapper)
    if skip_curly_braces:
        return all_columns
    transformers, _ = mapper.get_transformers()
    refs = self.get_column_refs()
    column_names = list(transformers)
    return _handle_curly_braces_refs(all_columns, refs, column_names)

column_metadata ¶

column_metadata() -> dict[int, ColumnMeta]

Return the metadata for each column.

Returns:

Type	Description
`dict[int, ColumnMeta]`	dict[int, 'ColumnMeta']: Number/ColumnMeta pairs.

Source code in hed/models/base_input.py

def column_metadata(self) -> dict[int, 'ColumnMeta']:
    """ Return the metadata for each column.

    Returns:
        dict[int, 'ColumnMeta']: Number/ColumnMeta pairs.
    """
    if self._mapper:
        return self._mapper._final_column_map
    return {}

combine_dataframe `staticmethod` ¶

combine_dataframe(dataframe) -> pd.Series

Combine all columns in the given dataframe into a single HED string series, skipping empty columns and columns with empty strings.

Parameters:

Name	Type	Description	Default
`dataframe`	`Dataframe`	The dataframe to combine	required

Returns:

Type	Description
`Series`	pd.Series: The assembled series.

Source code in hed/models/base_input.py

@staticmethod
def combine_dataframe(dataframe) ->pd.Series:
    """ Combine all columns in the given dataframe into a single HED string series,
        skipping empty columns and columns with empty strings.

    Parameters:
        dataframe (pd.Dataframe): The dataframe to combine

    Returns:
        pd.Series: The assembled series.
    """
    dataframe = dataframe.apply(
        lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))),
        axis=1
    )
    return dataframe

convert_to_form ¶

convert_to_form(hed_schema, tag_form)

Convert all tags in underlying dataframe to the specified form.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	The schema to use to convert tags.	required
`tag_form`	`str`	HedTag property to convert tags to. Most cases should use convert_to_short or convert_to_long below.	required

Source code in hed/models/base_input.py

def convert_to_form(self, hed_schema, tag_form):
    """ Convert all tags in underlying dataframe to the specified form.

    Parameters:
        hed_schema (HedSchema): The schema to use to convert tags.
        tag_form (str): HedTag property to convert tags to.
            Most cases should use convert_to_short or convert_to_long below.
    """
    from hed.models.df_util import convert_to_form
    convert_to_form(self._dataframe, hed_schema, tag_form, self._mapper.get_tag_columns())

convert_to_long ¶

convert_to_long(hed_schema)

Convert all tags in underlying dataframe to long form.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema or None`	The schema to use to convert tags.	required

Source code in hed/models/base_input.py

def convert_to_long(self, hed_schema):
    """ Convert all tags in underlying dataframe to long form.

    Parameters:
        hed_schema (HedSchema or None): The schema to use to convert tags.
    """
    self.convert_to_form(hed_schema, "long_tag")

convert_to_short ¶

convert_to_short(hed_schema)

Convert all tags in underlying dataframe to short form.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	The schema to use to convert tags.	required

Source code in hed/models/base_input.py

def convert_to_short(self, hed_schema):
    """ Convert all tags in underlying dataframe to short form.

    Parameters:
        hed_schema (HedSchema): The schema to use to convert tags.

    """
    self.convert_to_form(hed_schema, "short_tag")

expand_defs ¶

expand_defs(hed_schema, def_dict)

Shrinks any def-expand found in the underlying dataframe.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema or None`	The schema to use to identify defs.	required
`def_dict`	`DefinitionDict`	The definitions to expand.	required

Source code in hed/models/base_input.py

def expand_defs(self, hed_schema, def_dict):
    """ Shrinks any def-expand found in the underlying dataframe.

    Parameters:
        hed_schema (HedSchema or None): The schema to use to identify defs.
        def_dict (DefinitionDict): The definitions to expand.
    """
    from df_util import expand_defs
    expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns())

get_column_refs ¶

get_column_refs() -> list

Return a list of column refs for this file.

Default implementation returns empty list.

Returns:

Name	Type	Description
`list`	`list`	A list of unique column refs found.

Source code in hed/models/base_input.py

def get_column_refs(self) -> list:
    """ Return a list of column refs for this file.

        Default implementation returns empty list.

    Returns:
       list: A list of unique column refs found.
    """
    return []

get_def_dict ¶

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> DefinitionDict

Return the definition dict for this file.

Note: Baseclass implementation returns just extra_def_dicts.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	Identifies tags to find definitions(if needed).	required
`extra_def_dicts`	`list, DefinitionDict, or None`	Extra dicts to add to the list.	`None`

Returns:

Name	Type	Description
`DefinitionDict`	`DefinitionDict`	A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/base_input.py

def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
    """ Return the definition dict for this file.

    Note: Baseclass implementation returns just extra_def_dicts.

    Parameters:
        hed_schema (HedSchema): Identifies tags to find definitions(if needed).
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
        DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
    """
    from hed.models.definition_dict import DefinitionDict
    return DefinitionDict(extra_def_dicts, hed_schema)

get_worksheet ¶

get_worksheet(
    worksheet_name=None,
) -> Union[openpyxl.workbook.Workbook, None]

Get the requested worksheet.

Parameters:

Name	Type	Description	Default
`worksheet_name`	`str or None`	The name of the requested worksheet by name or the first one if None.	`None`

Returns:

Type	Description
`Union[Workbook, None]`	Union[openpyxl.workbook.Workbook, None]: The workbook request.

Notes

If None, returns the first worksheet.

:raises KeyError: - The specified worksheet name does not exist.

Source code in hed/models/base_input.py

def get_worksheet(self, worksheet_name=None) -> Union[openpyxl.workbook.Workbook, None]:
    """ Get the requested worksheet.

    Parameters:
        worksheet_name (str or None): The name of the requested worksheet by name or the first one if None.

    Returns:
        Union[openpyxl.workbook.Workbook, None]: The workbook request.

    Notes:
        If None, returns the first worksheet.

    :raises KeyError:
        - The specified worksheet name does not exist.
    """
    if worksheet_name and self._loaded_workbook:
        # return self._loaded_workbook.get_sheet_by_name(worksheet_name)
        return self._loaded_workbook[worksheet_name]
    elif self._loaded_workbook:
        return self._loaded_workbook.worksheets[0]
    else:
        return None

reset_mapper ¶

reset_mapper(new_mapper)

Set mapper to a different view of the file.

Parameters:

Name	Type	Description	Default
`new_mapper`	`ColumnMapper`	A column mapper to be associated with this base input.	required

Source code in hed/models/base_input.py

def reset_mapper(self, new_mapper):
    """ Set mapper to a different view of the file.

    Parameters:
        new_mapper (ColumnMapper): A column mapper to be associated with this base input.
    """
    self._mapper = new_mapper
    if not self._mapper:
        self._mapper = ColumnMapper()

    if self._dataframe is not None and self._has_column_names:
        columns = self._dataframe.columns
        self._mapper.set_column_map(columns)

set_cell ¶

set_cell(
    row_number,
    column_number,
    new_string_obj,
    tag_form="short_tag",
)

Replace the specified cell with transformed text.

Parameters:

Name	Type	Description	Default
`row_number`	`int`	The row number of the spreadsheet to set.	required
`column_number`	`int`	The column number of the spreadsheet to set.	required
`new_string_obj`	`HedString`	Object with text to put in the given cell.	required
`tag_form`	`str`	Version of the tags (short_tag, long_tag, base_tag, etc.)	`'short_tag'`

Notes

Any attribute of a HedTag that returns a string is a valid value of tag_form.

:raises ValueError: - There is not a loaded dataframe.

:raises KeyError: - The indicated row/column does not exist.

:raises AttributeError: - The indicated tag_form is not an attribute of HedTag.

Source code in hed/models/base_input.py

def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_tag"):
    """ Replace the specified cell with transformed text.

    Parameters:
        row_number (int):    The row number of the spreadsheet to set.
        column_number (int): The column number of the spreadsheet to set.
        new_string_obj (HedString): Object with text to put in the given cell.
        tag_form (str): Version of the tags (short_tag, long_tag, base_tag, etc.)

    Notes:
         Any attribute of a HedTag that returns a string is a valid value of tag_form.

    :raises ValueError:
        - There is not a loaded dataframe.

    :raises KeyError:
        - The indicated row/column does not exist.

    :raises AttributeError:
        - The indicated tag_form is not an attribute of HedTag.
    """
    if self._dataframe is None:
        raise ValueError("No data frame loaded")

    new_text = new_string_obj.get_as_form(tag_form)
    self._dataframe.iloc[row_number, column_number] = new_text

shrink_defs ¶

shrink_defs(hed_schema)

Shrinks any def-expand found in the underlying dataframe.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema or None`	The schema to use to identify defs.	required

Source code in hed/models/base_input.py

def shrink_defs(self, hed_schema):
    """ Shrinks any def-expand found in the underlying dataframe.

    Parameters:
        hed_schema (HedSchema or None): The schema to use to identify defs.
    """
    from df_util import shrink_defs
    shrink_defs(self._dataframe, hed_schema=hed_schema, columns=self._mapper.get_tag_columns())

to_csv ¶

to_csv(file=None)

Write to file or return as a string.

Parameters:

Name	Type	Description	Default
`file`	`str, file-like, or None`	Location to save this file. If None, return as string.	`None`

Returns: None or str: None if file is given or the contents as a str if file is None.

:raises OSError: - Cannot open the indicated file.

Source code in hed/models/base_input.py

def to_csv(self, file=None):
    """ Write to file or return as a string.

    Parameters:
        file (str, file-like, or None): Location to save this file. If None, return as string.
    Returns:
        None or str:  None if file is given or the contents as a str if file is None.

    :raises OSError:
        - Cannot open the indicated file.
    """
    dataframe = self._dataframe
    csv_string_if_filename_none = dataframe.to_csv(file, sep='\t', index=False, header=self._has_column_names)
    return csv_string_if_filename_none

to_excel ¶

to_excel(file)

Output to an Excel file.

Parameters:

Name	Type	Description	Default
`file`	`str or file - like`	Location to save this base input.	required

:raises ValueError: - If empty file object was passed.

:raises OSError: - Cannot open the indicated file.

Source code in hed/models/base_input.py

def to_excel(self, file):
    """ Output to an Excel file.

    Parameters:
        file (str or file-like): Location to save this base input.

    :raises ValueError:
        - If empty file object was passed.

    :raises OSError:
        - Cannot open the indicated file.
    """
    if not file:
        raise ValueError("Empty file name or object passed in to BaseInput.save.")

    dataframe = self._dataframe
    if self._loaded_workbook:
        old_worksheet = self.get_worksheet(self._worksheet_name)
        # Excel spreadsheets are 1 based, then add another 1 for column names if present
        adj_row_for_col_names = 1
        if self._has_column_names:
            adj_row_for_col_names += 1
        adj_for_one_based_cols = 1
        for row_number, text_file_row in dataframe.iterrows():
            for column_number, column_text in enumerate(text_file_row):
                cell_value = dataframe.iloc[row_number, column_number]
                old_worksheet.cell(row_number + adj_row_for_col_names,
                                   column_number + adj_for_one_based_cols).value = cell_value

        self._loaded_workbook.save(file)
    else:
        dataframe.to_excel(file, header=self._has_column_names)

validate ¶

validate(
    hed_schema,
    extra_def_dicts=None,
    name=None,
    error_handler=None,
) -> list[dict]

Creates a SpreadsheetValidator and returns all issues with this file.

Parameters:

Name	Type	Description	Default
`hed_schema`	`HedSchema`	The schema to use for validation.	required
`extra_def_dicts`	`list of DefDict or DefDict`	All definitions to use for validation.	`None`
`name`	`str`	The name to report errors from this file as.	`None`
`error_handler`	`ErrorHandler`	Error context to use. Creates a new one if None.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: A list of issues for a HED string.

Source code in hed/models/base_input.py

def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
    """Creates a SpreadsheetValidator and returns all issues with this file.

    Parameters:
        hed_schema (HedSchema): The schema to use for validation.
        extra_def_dicts (list of DefDict or DefDict): All definitions to use for validation.
        name (str): The name to report errors from this file as.
        error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

    Returns:
        list[dict]: A list of issues for a HED string.
    """
    from hed.validator.spreadsheet_validator import SpreadsheetValidator
    if not name:
        name = self.name
    tab_validator = SpreadsheetValidator(hed_schema)
    validation_issues = tab_validator.validate(self, self._mapper.get_def_dict(hed_schema, extra_def_dicts), name,
                                               error_handler=error_handler)
    return validation_issues

ColumnMapper¶

ColumnMapper ¶

Mapping of a base input file columns into HED tags.

Notes

All column numbers are 0 based.

Source code in hed/models/column_mapper.py

class ColumnMapper:
    """ Mapping of a base input file columns into HED tags.

    Notes:
        - All column numbers are 0 based.
    """

    def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None,
                 optional_tag_columns=None, warn_on_missing_column=False):
        """ Constructor for ColumnMapper.

        Parameters:
            sidecar (Sidecar): A sidecar to gather column data from.
            tag_columns: (list):  A list of ints or strings containing the columns that contain the HED tags.
                Sidecar column definitions will take precedent if there is a conflict with tag_columns.
            column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag
                prefixes to prepend to the tags in that column before processing.
            optional_tag_columns (list): A list of ints or strings containing the columns that contain
                the HED tags. If the column is otherwise unspecified, convert this column type to HEDTags.
            warn_on_missing_column (bool): If True, issue mapping warnings on column names that are missing from
                                            the sidecar.

        Notes:
            - All column numbers are 0 based.
            - The column_prefix_dictionary may be deprecated/renamed in the future.
                - These are no longer prefixes, but rather converted to value columns:
                  {"key": "Description", 1: "Label/"} will turn into value columns as
                  {"key": "Description/#", 1: "Label/#"}
                  It will be a validation issue if column 1 is called "key" in the above example.
                  This means it no longer accepts anything but the value portion only in the columns.

        """

        # Maps column number to column_entry.  This is what's actually used by most code.
        self._final_column_map = {}
        self._no_mapping_info = True
        self._column_map = {}
        self._reverse_column_map = {}
        self._warn_on_missing_column = warn_on_missing_column
        if tag_columns is None:
            tag_columns = []
        self._tag_columns = tag_columns
        if optional_tag_columns is None:
            optional_tag_columns = []
        self._optional_tag_columns = optional_tag_columns
        if column_prefix_dictionary is None:
            column_prefix_dictionary = {}
        self._column_prefix_dictionary = column_prefix_dictionary

        self._na_patterns = ["n/a", "nan"]
        self._sidecar = None
        self._set_sidecar(sidecar)

        # finalize the column map based on initial settings with no header
        self._finalize_mapping()

    @property
    def tag_columns(self):
        """ Return the known tag and optional tag columns with numbers as names when possible.

            Returns:
                tag_columns(list of str or int): A list of all tag and optional tag columns as labels.
        """
        joined_list = self._tag_columns + self._optional_tag_columns
        return list(set(self._convert_to_names(self._column_map, joined_list)))

    @property
    def column_prefix_dictionary(self):
        """ Return the column_prefix_dictionary with numbers turned into names where possible.

            Returns:
                column_prefix_dictionary(list of str or int): A column_prefix_dictionary with column labels as keys.
        """
        return self._convert_to_names_dict(self._column_map, self._column_prefix_dictionary)

    def get_transformers(self):
        """ Return the transformers to use on a dataframe.

            Returns:
                tuple(dict, list):
                    dict({str or int: func}): The functions to use to transform each column.
                    need_categorical(list of int): A list of columns to treat as categorical.
        """
        final_transformers = {}
        need_categorical = []
        for column in self._final_column_map.values():
            assign_to_column = column.column_name
            if isinstance(assign_to_column, int):
                if self._column_map:
                    assign_to_column = self._column_map[assign_to_column]
                else:
                    assign_to_column = assign_to_column
            if column.column_type == ColumnType.Ignore:
                continue
            elif column.column_type == ColumnType.Value:
                value_str = column.hed_dict
                from functools import partial
                final_transformers[assign_to_column] = partial(self._value_handler, value_str)
            elif column.column_type == ColumnType.Categorical:
                need_categorical.append(column.column_name)
                category_values = column.hed_dict
                from functools import partial
                final_transformers[assign_to_column] = partial(self._category_handler, category_values)
            else:
                final_transformers[assign_to_column] = lambda x: x

        return final_transformers, need_categorical

    @staticmethod
    def check_for_blank_names(column_map, allow_blank_names) -> list[dict]:
        """ Validate there are no blank column names.

        Parameters:
            column_map (iterable): A list of column names.
            allow_blank_names (bool): Only find issues if True.

        Returns:
            list[dict]: A list of dicts, one per issue.
        """
        # We don't have any checks right now if blank/duplicate is allowed
        if allow_blank_names:
            return []

        issues = []

        for column_number, name in enumerate(column_map):
            if name is None or not name or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number)
                continue

        return issues

    def _set_sidecar(self, sidecar):
        """ Set the sidecar this column mapper uses.

        Parameters:
            sidecar (Sidecar or None): The sidecar to use.

        :raises ValueError:
            - A sidecar was previously set.
        """
        if self._sidecar:
            raise ValueError("Trying to set a second sidecar on a column mapper.")
        if not sidecar:
            return None

        self._sidecar = sidecar

    @property
    def sidecar_column_data(self):
        """ Pass through to get the sidecar ColumnMetadata.

        Returns:
            dict({str:ColumnMetadata}): The column metadata defined by this sidecar.
        """
        if self._sidecar:
            return self._sidecar.column_data

        return {}

    def get_tag_columns(self):
        """ Return the column numbers or names that are mapped to be HedTags.

            Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.

        Returns:
            column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
                0-based if integer-based, otherwise column name.
        """
        return [column_entry.column_name for number, column_entry in self._final_column_map.items()
                if column_entry.column_type == ColumnType.HEDTags]

    def set_tag_columns(self, tag_columns=None, optional_tag_columns=None, finalize_mapping=True):
        """ Set tag columns and optional tag columns.

        Parameters:
            tag_columns (list): A list of ints or strings containing the columns that contain the HED tags.
                                If None, clears existing tag_columns
            optional_tag_columns (list): A list of ints or strings containing the columns that contain the HED tags,
                                         but not an error if missing.
                                         If None, clears existing tag_columns
            finalize_mapping (bool): Re-generate the internal mapping if True, otherwise no effect until finalize.
        """
        if tag_columns is None:
            tag_columns = []
        if optional_tag_columns is None:
            optional_tag_columns = []
        self._tag_columns = tag_columns
        self._optional_tag_columns = optional_tag_columns
        if finalize_mapping:
            self._finalize_mapping()

    def set_column_map(self, new_column_map=None) -> list[dict]:
        """ Set the column number to name mapping.

        Parameters:
            new_column_map (list or dict):  Either an ordered list of the column names or column_number:column name.
                dictionary. In both cases, column numbers start at 0.

        Returns:
            list[dict]: List of issues. Each issue is a dictionary.

        """
        if new_column_map is None:
            new_column_map = {}
        if isinstance(new_column_map, dict):
            column_map = new_column_map
        # List like
        else:
            column_map = {column_number: column_name for column_number, column_name in enumerate(new_column_map)}
        self._column_map = column_map
        self._reverse_column_map = {column_name: column_number for column_number, column_name in column_map.items()}
        self._finalize_mapping()

    def set_column_prefix_dictionary(self, column_prefix_dictionary, finalize_mapping=True):
        """Set the column prefix dictionary. """
        self._column_prefix_dictionary = column_prefix_dictionary
        if finalize_mapping:
            self._finalize_mapping()

    @staticmethod
    def _get_sidecar_basic_map(column_map, column_data):
        basic_final_map = {}
        unhandled_cols = []
        if column_map:
            for column_number, column_name in column_map.items():
                if column_name is None:
                    continue
                if column_name in column_data:
                    column_entry = copy.deepcopy(column_data[column_name])
                    column_entry.column_name = column_name
                    basic_final_map[column_name] = column_entry
                    continue
                elif isinstance(column_name, str) and column_name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                    continue
                unhandled_cols.append(column_name)

        return basic_final_map, unhandled_cols

    @staticmethod
    def _convert_to_names(column_to_name_map, column_list):
        converted_names = []
        for index in column_list:
            if isinstance(index, int):
                if not column_to_name_map:
                    converted_names.append(index)
                elif index in column_to_name_map:
                    converted_names.append(column_to_name_map[index])
            else:
                if index in column_to_name_map.values():
                    converted_names.append(index)
        return converted_names

    @staticmethod
    def _convert_to_names_dict(column_to_name_map, column_dict):
        converted_dict = {}
        for index, column_data in column_dict.items():
            if isinstance(index, int):
                if not column_to_name_map:
                    converted_dict[index] = column_data
                elif index in column_to_name_map:
                    converted_dict[column_to_name_map[index]] = column_data
            else:
                if index in column_to_name_map.values():
                    converted_dict[index] = column_data
        return converted_dict

    @staticmethod
    def _add_value_columns(final_map, column_prefix_dictionary):
        for col, prefix in column_prefix_dictionary.items():
            if prefix.endswith("/"):
                prefix = prefix + "#"
            else:
                prefix = prefix + "/#"
            new_def = ColumnMetadata(ColumnType.Value, col, source=prefix)
            final_map[col] = new_def

    @staticmethod
    def _add_tag_columns(final_map, tag_columns):
        for col in tag_columns:
            new_def = ColumnMetadata(ColumnType.HEDTags, col)
            final_map[col] = new_def

    def _get_column_lists(self):
        column_lists = self._tag_columns, self._optional_tag_columns, self._column_prefix_dictionary
        list_names = ["tag_columns", "optional_tag_columns", "column_prefix_dictionary"]

        if not any(column for column in column_lists):
            return column_lists, list_names
        # Filter out empty lists from the above
        column_lists, list_names = zip(*[(col_list, list_name) for col_list, list_name in zip(column_lists, list_names)
                                         if col_list])

        return column_lists, list_names

    def _check_for_duplicates_and_required(self, list_names, column_lists) -> list[dict]:
        """ Check for duplicates and required columns in the given lists.
        """
        issues = []
        for list_name, col_list in zip(list_names, column_lists):
            # Convert all known strings to ints, then check for duplicates
            converted_list = [item if isinstance(item, int) else self._reverse_column_map.get(item, item)
                              for item in col_list]

            if col_list != self._optional_tag_columns:
                for test_col in converted_list:
                    if isinstance(test_col, str) and test_col not in self._reverse_column_map:
                        issues += ErrorHandler.format_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN,
                                                            test_col, list_name)

            issues += self._check_for_duplicates_between_lists(converted_list, list_name,
                                                               ValidationErrors.DUPLICATE_COLUMN_IN_LIST)

        return issues

    def _check_for_duplicates_between_lists(self, checking_list, list_names, error_type):
        issues = []
        duplicates = [item for item, count in Counter(checking_list).items() if count > 1]
        for duplicate in duplicates:
            issues += ErrorHandler.format_error(error_type, duplicate,
                                                self._column_map.get(duplicate), list_names)
        return issues

    def check_for_mapping_issues(self, allow_blank_names=False) ->list[dict]:
        """ Find all issues given the current column_map, tag_columns, etc.

        Parameters:
            allow_blank_names (bool): Only flag blank names if False.

        Returns:
            list[dict]: All issues found as a list of dicts.
        """
        # 1. Get the lists with entries
        column_lists, list_names = self._get_column_lists()
        # 2. Verify column_prefix columns and tag columns are present, and check for duplicates
        issues = self._check_for_duplicates_and_required(list_names, column_lists)

        combined_list = self.tag_columns + list(self.column_prefix_dictionary)
        # 3. Verify prefix and tag columns do not conflict.
        issues += self._check_for_duplicates_between_lists(combined_list, list_names,
                                                           ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES)

        # 4. Verify we didn't get both a sidecar and a tag column list
        if self._sidecar and combined_list and combined_list != ["HED"]:
            issues += ErrorHandler.format_error(ValidationErrors.SIDECAR_AND_OTHER_COLUMNS, column_names=combined_list)

        # 5. Verify we handled all columns
        if self._warn_on_missing_column:
            fully_combined_list = list(self.sidecar_column_data) + combined_list + NO_WARN_COLUMNS
            for column in self._column_map.values():
                if column not in fully_combined_list:
                    issues += ErrorHandler.format_error(ValidationErrors.HED_UNKNOWN_COLUMN, column)

        issues += self.check_for_blank_names(self._column_map.values(), allow_blank_names=allow_blank_names)
        return issues

    def _finalize_mapping(self):
        final_map, unhandled_cols = self._get_sidecar_basic_map(self._column_map, self.sidecar_column_data)

        self._add_tag_columns(final_map, self.tag_columns)
        self._remove_from_list(unhandled_cols, self.tag_columns)

        self._add_value_columns(final_map, self.column_prefix_dictionary)
        self._remove_from_list(unhandled_cols, self.column_prefix_dictionary)

        self._final_column_map = dict(sorted(final_map.items()))

    @staticmethod
    def _remove_from_list(list_to_alter, to_remove) -> list:
        return [item for item in list_to_alter if item not in to_remove]

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> DefinitionDict:
        """ Return def dicts from every column description.

        Parameters:
            hed_schema (Schema): A HED schema object to use for extracting definitions.
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
           DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
        """
        if self._sidecar:
            return self._sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

        return DefinitionDict(extra_def_dicts, hed_schema=hed_schema)

    def get_column_mapping_issues(self) -> list[dict]:
        """ Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.).

        Notes:
            - This is deprecated and now a wrapper for "check_for_mapping_issues()".

        Returns:
            list[dict]: A list dictionaries of all issues found from mapping column names to numbers.

        """
        return self.check_for_mapping_issues()

    @staticmethod
    def _category_handler(category_values, x):
        return category_values.get(x, "")

    @staticmethod
    def _value_handler(value_str, x):
        if x == "n/a":
            return "n/a"

        return value_str.replace("#", str(x))

column_prefix_dictionary `property` ¶

column_prefix_dictionary

Return the column_prefix_dictionary with numbers turned into names where possible.

Returns:

Name	Type	Description
`column_prefix_dictionary`	`list of str or int`	A column_prefix_dictionary with column labels as keys.

sidecar_column_data `property` ¶

sidecar_column_data

Pass through to get the sidecar ColumnMetadata.

Returns:

Name	Type	Description
`dict`	`{str: ColumnMetadata}`	The column metadata defined by this sidecar.

tag_columns `property` ¶

tag_columns

Return the known tag and optional tag columns with numbers as names when possible.

Returns:

Name	Type	Description
`tag_columns`	`list of str or int`	A list of all tag and optional tag columns as labels.

check_for_blank_names `staticmethod` ¶

check_for_blank_names(
    column_map, allow_blank_names
) -> list[dict]

Validate there are no blank column names.

Parameters:

Name	Type	Description	Default
`column_map`	`iterable`	A list of column names.	required
`allow_blank_names`	`bool`	Only find issues if True.	required

Returns:

Type	Description
`list[dict]`	list[dict]: A list of dicts, one per issue.

Source code in hed/models/column_mapper.py

@staticmethod
def check_for_blank_names(column_map, allow_blank_names) -> list[dict]:
    """ Validate there are no blank column names.

    Parameters:
        column_map (iterable): A list of column names.
        allow_blank_names (bool): Only find issues if True.

    Returns:
        list[dict]: A list of dicts, one per issue.
    """
    # We don't have any checks right now if blank/duplicate is allowed
    if allow_blank_names:
        return []

    issues = []

    for column_number, name in enumerate(column_map):
        if name is None or not name or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
            issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number)
            continue

    return issues

check_for_mapping_issues ¶

check_for_mapping_issues(
    allow_blank_names=False,
) -> list[dict]

Find all issues given the current column_map, tag_columns, etc.

Parameters:

Name	Type	Description	Default
`allow_blank_names`	`bool`	Only flag blank names if False.	`False`

Returns:

Type	Description
`list[dict]`	list[dict]: All issues found as a list of dicts.

Source code in hed/models/column_mapper.py

def check_for_mapping_issues(self, allow_blank_names=False) ->list[dict]:
    """ Find all issues given the current column_map, tag_columns, etc.

    Parameters:
        allow_blank_names (bool): Only flag blank names if False.

    Returns:
        list[dict]: All issues found as a list of dicts.
    """
    # 1. Get the lists with entries
    column_lists, list_names = self._get_column_lists()
    # 2. Verify column_prefix columns and tag columns are present, and check for duplicates
    issues = self._check_for_duplicates_and_required(list_names, column_lists)

    combined_list = self.tag_columns + list(self.column_prefix_dictionary)
    # 3. Verify prefix and tag columns do not conflict.
    issues += self._check_for_duplicates_between_lists(combined_list, list_names,
                                                       ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES)

    # 4. Verify we didn't get both a sidecar and a tag column list
    if self._sidecar and combined_list and combined_list != ["HED"]:
        issues += ErrorHandler.format_error(ValidationErrors.SIDECAR_AND_OTHER_COLUMNS, column_names=combined_list)

    # 5. Verify we handled all columns
    if self._warn_on_missing_column:
        fully_combined_list = list(self.sidecar_column_data) + combined_list + NO_WARN_COLUMNS
        for column in self._column_map.values():
            if column not in fully_combined_list:
                issues += ErrorHandler.format_error(ValidationErrors.HED_UNKNOWN_COLUMN, column)

    issues += self.check_for_blank_names(self._column_map.values(), allow_blank_names=allow_blank_names)
    return issues

get_column_mapping_issues ¶

get_column_mapping_issues() -> list[dict]

Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.).

Notes

This is deprecated and now a wrapper for "check_for_mapping_issues()".

Returns:

Type	Description
`list[dict]`	list[dict]: A list dictionaries of all issues found from mapping column names to numbers.

Source code in hed/models/column_mapper.py

def get_column_mapping_issues(self) -> list[dict]:
    """ Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.).

    Notes:
        - This is deprecated and now a wrapper for "check_for_mapping_issues()".

    Returns:
        list[dict]: A list dictionaries of all issues found from mapping column names to numbers.

    """
    return self.check_for_mapping_issues()

get_def_dict ¶

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> DefinitionDict

Return def dicts from every column description.

Parameters:

Name	Type	Description	Default
`hed_schema`	`Schema`	A HED schema object to use for extracting definitions.	required
`extra_def_dicts`	`list, DefinitionDict, or None`	Extra dicts to add to the list.	`None`

Returns:

Name	Type	Description
`DefinitionDict`	`DefinitionDict`	A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/column_mapper.py

def get_def_dict(self, hed_schema, extra_def_dicts=None) -> DefinitionDict:
    """ Return def dicts from every column description.

    Parameters:
        hed_schema (Schema): A HED schema object to use for extracting definitions.
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
       DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
    """
    if self._sidecar:
        return self._sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

    return DefinitionDict(extra_def_dicts, hed_schema=hed_schema)

get_tag_columns ¶

get_tag_columns()

Return the column numbers or names that are mapped to be HedTags.

Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.

Returns:

Name	Type	Description
`column_identifiers`	`list`	A list of column numbers or names that are ColumnType.HedTags. 0-based if integer-based, otherwise column name.

Source code in hed/models/column_mapper.py

def get_tag_columns(self):
    """ Return the column numbers or names that are mapped to be HedTags.

        Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.

    Returns:
        column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
            0-based if integer-based, otherwise column name.
    """
    return [column_entry.column_name for number, column_entry in self._final_column_map.items()
            if column_entry.column_type == ColumnType.HEDTags]

get_transformers ¶

get_transformers()

Return the transformers to use on a dataframe.

Returns:

Name	Type	Description
`tuple`	`(dict, list)`	dict({str or int: func}): The functions to use to transform each column. need_categorical(list of int): A list of columns to treat as categorical.

Source code in hed/models/column_mapper.py

def get_transformers(self):
    """ Return the transformers to use on a dataframe.

        Returns:
            tuple(dict, list):
                dict({str or int: func}): The functions to use to transform each column.
                need_categorical(list of int): A list of columns to treat as categorical.
    """
    final_transformers = {}
    need_categorical = []
    for column in self._final_column_map.values():
        assign_to_column = column.column_name
        if isinstance(assign_to_column, int):
            if self._column_map:
                assign_to_column = self._column_map[assign_to_column]
            else:
                assign_to_column = assign_to_column
        if column.column_type == ColumnType.Ignore:
            continue
        elif column.column_type == ColumnType.Value:
            value_str = column.hed_dict
            from functools import partial
            final_transformers[assign_to_column] = partial(self._value_handler, value_str)
        elif column.column_type == ColumnType.Categorical:
            need_categorical.append(column.column_name)
            category_values = column.hed_dict
            from functools import partial
            final_transformers[assign_to_column] = partial(self._category_handler, category_values)
        else:
            final_transformers[assign_to_column] = lambda x: x

    return final_transformers, need_categorical

set_column_map ¶

set_column_map(new_column_map=None) -> list[dict]

Set the column number to name mapping.

Parameters:

Name	Type	Description	Default
`new_column_map`	`list or dict`	Either an ordered list of the column names or column_number:column name. dictionary. In both cases, column numbers start at 0.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: List of issues. Each issue is a dictionary.

Source code in hed/models/column_mapper.py

def set_column_map(self, new_column_map=None) -> list[dict]:
    """ Set the column number to name mapping.

    Parameters:
        new_column_map (list or dict):  Either an ordered list of the column names or column_number:column name.
            dictionary. In both cases, column numbers start at 0.

    Returns:
        list[dict]: List of issues. Each issue is a dictionary.

    """
    if new_column_map is None:
        new_column_map = {}
    if isinstance(new_column_map, dict):
        column_map = new_column_map
    # List like
    else:
        column_map = {column_number: column_name for column_number, column_name in enumerate(new_column_map)}
    self._column_map = column_map
    self._reverse_column_map = {column_name: column_number for column_number, column_name in column_map.items()}
    self._finalize_mapping()

set_column_prefix_dictionary ¶

set_column_prefix_dictionary(
    column_prefix_dictionary, finalize_mapping=True
)

Set the column prefix dictionary.

Source code in hed/models/column_mapper.py

def set_column_prefix_dictionary(self, column_prefix_dictionary, finalize_mapping=True):
    """Set the column prefix dictionary. """
    self._column_prefix_dictionary = column_prefix_dictionary
    if finalize_mapping:
        self._finalize_mapping()

set_tag_columns ¶

set_tag_columns(
    tag_columns=None,
    optional_tag_columns=None,
    finalize_mapping=True,
)

Set tag columns and optional tag columns.

Parameters:

Name	Type	Description	Default
`tag_columns`	`list`	A list of ints or strings containing the columns that contain the HED tags. If None, clears existing tag_columns	`None`
`optional_tag_columns`	`list`	A list of ints or strings containing the columns that contain the HED tags, but not an error if missing. If None, clears existing tag_columns	`None`
`finalize_mapping`	`bool`	Re-generate the internal mapping if True, otherwise no effect until finalize.	`True`

Source code in hed/models/column_mapper.py

def set_tag_columns(self, tag_columns=None, optional_tag_columns=None, finalize_mapping=True):
    """ Set tag columns and optional tag columns.

    Parameters:
        tag_columns (list): A list of ints or strings containing the columns that contain the HED tags.
                            If None, clears existing tag_columns
        optional_tag_columns (list): A list of ints or strings containing the columns that contain the HED tags,
                                     but not an error if missing.
                                     If None, clears existing tag_columns
        finalize_mapping (bool): Re-generate the internal mapping if True, otherwise no effect until finalize.
    """
    if tag_columns is None:
        tag_columns = []
    if optional_tag_columns is None:
        optional_tag_columns = []
    self._tag_columns = tag_columns
    self._optional_tag_columns = optional_tag_columns
    if finalize_mapping:
        self._finalize_mapping()

DefinitionDict¶

DefinitionDict ¶

Gathers definitions from a single source.

Source code in hed/models/definition_dict.py

class DefinitionDict:
    """ Gathers definitions from a single source. """

    def __init__(self, def_dicts=None, hed_schema=None):
        """ Definitions to be considered a single source.

        Parameters:
            def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or
                a single string whose definitions should be added.
            hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

        :raises TypeError:
            - Bad type passed as def_dicts.
        """

        self.defs = {}
        self._issues = []
        if def_dicts:
            self.add_definitions(def_dicts, hed_schema)

    def add_definitions(self, def_dicts, hed_schema=None):
        """ Add definitions from dict(s) or strings(s) to this dict.

        Parameters:
            def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts
                                                            whose definitions should be added.
            hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

        Note - dict form expects DefinitionEntries in the same form as a DefinitionDict
                Note - str or list of strings will parse the strings using the hed_schema.
                Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input.

        :raises TypeError:
            - Bad type passed as def_dicts.
        """
        if not isinstance(def_dicts, list):
            def_dicts = [def_dicts]
        for def_dict in def_dicts:
            if isinstance(def_dict, (DefinitionDict, dict)):
                self._add_definitions_from_dict(def_dict)
            elif isinstance(def_dict, str) and hed_schema:
                self.check_for_definitions(HedString(def_dict, hed_schema))
            elif isinstance(def_dict, list) and hed_schema:
                for definition in def_dict:
                    self.check_for_definitions(HedString(definition, hed_schema))
            else:
                raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict")

    def _add_definition(self, def_tag, def_value):
        if def_tag in self.defs:
            error_context = self.defs[def_tag].source_context
            self._issues += ErrorHandler.format_error_from_context(DefinitionErrors.DUPLICATE_DEFINITION,
                error_context=error_context, def_name=def_tag, actual_error=DefinitionErrors.DUPLICATE_DEFINITION)
        else:
            self.defs[def_tag] = def_value

    def _add_definitions_from_dict(self, def_dict):
        """ Add the definitions found in the given definition dictionary to this mapper.

         Parameters:
             def_dict (DefinitionDict or dict): DefDict whose definitions should be added.

        """
        for def_tag, def_value in def_dict.items():
            self._add_definition(def_tag, def_value)

    def get(self, def_name) -> Union[DefinitionEntry, None]:
        """ Get the definition entry for the definition name.

            Not case-sensitive

        Parameters:
            def_name (str):  Name of the definition to retrieve.

        Returns:
            Union[DefinitionEntry, None]:  Definition entry for the requested definition.
        """
        return self.defs.get(def_name.casefold())

    def __iter__(self):
        return iter(self.defs)

    def __len__(self):
        return len(self.defs)

    def items(self):
        """ Return the dictionary of definitions.

            Alias for .defs.items()

        Returns:
            def_entries({str: DefinitionEntry}): A list of definitions.
        """
        return self.defs.items()

    @property
    def issues(self):
        """Return issues about duplicate definitions."""
        return self._issues

    def check_for_definitions(self, hed_string_obj, error_handler=None) -> list[dict]:
        """ Check string for definition tags, adding them to self.

        Parameters:
            hed_string_obj (HedString): A single HED string to gather definitions from.
            error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

        Returns:
            list[dict]:  List of issues encountered in checking for definitions. Each issue is a dictionary.
        """
        def_issues = []
        for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
            group_tag, new_def_issues = self._find_group(definition_tag, group, error_handler)
            def_tag_name, def_takes_value = self._strip_value_placeholder(definition_tag.extension)

            if "/" in def_tag_name or "#" in def_tag_name:
                new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                         DefinitionErrors.INVALID_DEFINITION_EXTENSION,
                                                                         tag=definition_tag,
                                                                         def_name=def_tag_name)

            if new_def_issues:
                def_issues += new_def_issues
                continue

            new_def_issues = self._validate_contents(definition_tag, group_tag, error_handler)
            new_def_issues += self._validate_placeholders(def_tag_name, group_tag, def_takes_value, error_handler)

            if new_def_issues:
                def_issues += new_def_issues
                continue

            new_def_issues, context = self._validate_name_and_context(def_tag_name, error_handler)
            if new_def_issues:
                def_issues += new_def_issues
                continue

            self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
                                                                 takes_value=def_takes_value,
                                                                 source_context=context)

        return def_issues

    @staticmethod
    def _strip_value_placeholder(def_tag_name):
        def_takes_value = def_tag_name.endswith("/#")
        if def_takes_value:
            def_tag_name = def_tag_name[:-len("/#")]
        return def_tag_name, def_takes_value

    def _validate_name_and_context(self, def_tag_name, error_handler):
        if error_handler:
            context = error_handler.error_context
        else:
            context = []
        new_def_issues = []
        if def_tag_name.casefold() in self.defs:
            new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.DUPLICATE_DEFINITION,
                                                                     def_name=def_tag_name)
        return new_def_issues, context

    @staticmethod
    def _validate_placeholders(def_tag_name, group, def_takes_value, error_handler):
        """ Check the definition for the correct placeholders (exactly 1 placeholder when takes value).

        Parameters:
            def_tag_name (str): The name of the definition without any Definition tag or value.
            group (HedGroup): The contents of the definition.
            def_takes_value (bool): True if the definition takes a value (should have #).
            error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

            Returns:
               list:  List of issues encountered in checking for definitions. Each issue is a dictionary.
        """
        new_issues = []
        placeholder_tags = []
        tags_with_issues = []

        # Find the tags that have # in their strings and return issues of count > 1.
        if group:
            for tag in group.get_all_tags():
                count = str(tag).count("#")
                if count:
                    placeholder_tags.append(tag)
                if count > 1:
                    tags_with_issues.append(tag)

        if tags_with_issues:
            new_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                 DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS,
                                                                 def_name=def_tag_name,
                                                                 tag_list=tags_with_issues,
                                                                 expected_count=1 if def_takes_value else 0)
        # Make sure placeholder count is correct.
        if (len(placeholder_tags) == 1) != def_takes_value:
            new_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                 DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS,
                                                                 def_name=def_tag_name,
                                                                 tag_list=placeholder_tags,
                                                                 expected_count=1 if def_takes_value else 0)
            return new_issues

        # Make sure that the tag with the placeholder is allowed to take a value.
        if def_takes_value:
            placeholder_tag = placeholder_tags[0]
            if not placeholder_tag.is_takes_value_tag():
                new_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.PLACEHOLDER_NO_TAKES_VALUE,
                                                                     def_name=def_tag_name,
                                                                     placeholder_tag=placeholder_tag)

        return new_issues

    @staticmethod
    def _find_group(definition_tag, group, error_handler):
        """ Check the definition for the correct placeholders (exactly 1 placeholder when takes value).

        Parameters:
            definition_tag (HedTag): The Definition tag itself.
            group (HedGroup): The entire definition group include the Definition tag.
            error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

            Returns:
               list:  List of issues encountered in checking for definitions. Each issue is a dictionary.
        """
        # initial validation
        groups = group.groups()
        issues = []
        if len(groups) > 1:
            issues += \
                ErrorHandler.format_error_with_context(error_handler,
                                                       DefinitionErrors.WRONG_NUMBER_GROUPS,
                                                       def_name=definition_tag.extension, tag_list=groups)
        elif len(groups) == 0 and '#' in definition_tag.extension:
            issues += \
                ErrorHandler.format_error_with_context(error_handler,
                                                       DefinitionErrors.NO_DEFINITION_CONTENTS,
                                                       def_name=definition_tag.extension)
        if len(group.tags()) != 1:
            issues += \
                ErrorHandler.format_error_with_context(error_handler,
                                                       DefinitionErrors.WRONG_NUMBER_TAGS,
                                                       def_name=definition_tag.extension,
                                                       tag_list=[tag for tag in group.tags()
                                                                 if tag is not definition_tag])

        group_tag = groups[0] if groups else None

        return group_tag, issues

    @staticmethod
    def _validate_contents(definition_tag, group, error_handler):
        issues = []
        if group:
            def_keys = {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY}
            for def_tag in group.find_tags(def_keys, recursive=True, include_groups=0):
                issues += ErrorHandler.format_error_with_context(error_handler,
                                                                 DefinitionErrors.DEF_TAG_IN_DEFINITION,
                                                                 tag=def_tag,
                                                                 def_name=definition_tag.extension)

            for tag in group.get_all_tags():
                if tag.has_attribute(HedKey.Unique) or tag.has_attribute(HedKey.Required):
                    issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.BAD_PROP_IN_DEFINITION,
                                                                     tag=tag,
                                                                     def_name=definition_tag.extension)

        return issues

    def get_definition_entry(self, def_tag):
        """ Get the entry for a given def tag.

            Does not validate at all.

        Parameters:
            def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.

        Returns:
            def_entry(DefinitionEntry or None): The definition entry if it exists
        """
        tag_label, _, placeholder = def_tag.extension.partition('/')

        label_tag_lower = tag_label.casefold()
        def_entry = self.defs.get(label_tag_lower)
        return def_entry

    def _get_definition_contents(self, def_tag):
        """ Get the contents for a given def tag.

            Does not validate at all.

        Parameters:
            def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.

        Returns:
            def_contents: HedGroup
            The contents to replace the previous def-tag with.
        """
        tag_label, _, placeholder = def_tag.extension.partition('/')

        label_tag_lower = tag_label.casefold()
        def_entry = self.defs.get(label_tag_lower)
        if def_entry is None:
            # Could raise an error here?
            return None

        def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
        return def_contents

    @staticmethod
    def get_as_strings(def_dict) -> dict[str, str]:
        """ Convert the entries to strings of the contents

        Parameters:
            def_dict (dict): A dict of definitions

        Returns:
            dict[str,str]: definition name and contents
        """
        if isinstance(def_dict, DefinitionDict):
            def_dict = def_dict.defs

        return {key: str(value.contents) for key, value in def_dict.items()}

issues `property` ¶

issues

Return issues about duplicate definitions.

add_definitions ¶

add_definitions(def_dicts, hed_schema=None)

Add definitions from dict(s) or strings(s) to this dict.

Parameters:

Name	Type	Description	Default
`def_dicts`	`list, DefinitionDict, dict, or str`	DefinitionDict or list of DefinitionDicts/strings/dicts whose definitions should be added.	required
`hed_schema`	`HedSchema or None`	Required if passing strings or lists of strings, unused otherwise.	`None`

Note - dict form expects DefinitionEntries in the same form as a DefinitionDict Note - str or list of strings will parse the strings using the hed_schema. Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input.

:raises TypeError: - Bad type passed as def_dicts.

Source code in hed/models/definition_dict.py

def add_definitions(self, def_dicts, hed_schema=None):
    """ Add definitions from dict(s) or strings(s) to this dict.

    Parameters:
        def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts
                                                        whose definitions should be added.
        hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

    Note - dict form expects DefinitionEntries in the same form as a DefinitionDict
            Note - str or list of strings will parse the strings using the hed_schema.
            Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input.

    :raises TypeError:
        - Bad type passed as def_dicts.
    """
    if not isinstance(def_dicts, list):
        def_dicts = [def_dicts]
    for def_dict in def_dicts:
        if isinstance(def_dict, (DefinitionDict, dict)):
            self._add_definitions_from_dict(def_dict)
        elif isinstance(def_dict, str) and hed_schema:
            self.check_for_definitions(HedString(def_dict, hed_schema))
        elif isinstance(def_dict, list) and hed_schema:
            for definition in def_dict:
                self.check_for_definitions(HedString(definition, hed_schema))
        else:
            raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict")

check_for_definitions ¶

check_for_definitions(
    hed_string_obj, error_handler=None
) -> list[dict]

Check string for definition tags, adding them to self.

Parameters:

Name	Type	Description	Default
`hed_string_obj`	`HedString`	A single HED string to gather definitions from.	required
`error_handler`	`ErrorHandler or None`	Error context used to identify where definitions are found.	`None`

Returns:

Type	Description
`list[dict]`	list[dict]: List of issues encountered in checking for definitions. Each issue is a dictionary.

Source code in hed/models/definition_dict.py

def check_for_definitions(self, hed_string_obj, error_handler=None) -> list[dict]:
    """ Check string for definition tags, adding them to self.

    Parameters:
        hed_string_obj (HedString): A single HED string to gather definitions from.
        error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

    Returns:
        list[dict]:  List of issues encountered in checking for definitions. Each issue is a dictionary.
    """
    def_issues = []
    for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
        group_tag, new_def_issues = self._find_group(definition_tag, group, error_handler)
        def_tag_name, def_takes_value = self._strip_value_placeholder(definition_tag.extension)

        if "/" in def_tag_name or "#" in def_tag_name:
            new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.INVALID_DEFINITION_EXTENSION,
                                                                     tag=definition_tag,
                                                                     def_name=def_tag_name)

        if new_def_issues:
            def_issues += new_def_issues
            continue

        new_def_issues = self._validate_contents(definition_tag, group_tag, error_handler)
        new_def_issues += self._validate_placeholders(def_tag_name, group_tag, def_takes_value, error_handler)

        if new_def_issues:
            def_issues += new_def_issues
            continue

        new_def_issues, context = self._validate_name_and_context(def_tag_name, error_handler)
        if new_def_issues:
            def_issues += new_def_issues
            continue

        self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
                                                             takes_value=def_takes_value,
                                                             source_context=context)

    return def_issues

get ¶

get(def_name) -> Union[DefinitionEntry, None]

Get the definition entry for the definition name.

Not case-sensitive

Parameters:

Name	Type	Description	Default
`def_name`	`str`	Name of the definition to retrieve.	required

Returns:

Type	Description
`Union[DefinitionEntry, None]`	Union[DefinitionEntry, None]: Definition entry for the requested definition.

Source code in hed/models/definition_dict.py

def get(self, def_name) -> Union[DefinitionEntry, None]:
    """ Get the definition entry for the definition name.

        Not case-sensitive

    Parameters:
        def_name (str):  Name of the definition to retrieve.

    Returns:
        Union[DefinitionEntry, None]:  Definition entry for the requested definition.
    """
    return self.defs.get(def_name.casefold())

get_as_strings `staticmethod` ¶

get_as_strings(def_dict) -> dict[str, str]

Convert the entries to strings of the contents

Parameters:

Name	Type	Description	Default
`def_dict`	`dict`	A dict of definitions	required

Returns:

Type	Description
`dict[str, str]`	dict[str,str]: definition name and contents

Source code in hed/models/definition_dict.py

@staticmethod
def get_as_strings(def_dict) -> dict[str, str]:
    """ Convert the entries to strings of the contents

    Parameters:
        def_dict (dict): A dict of definitions

    Returns:
        dict[str,str]: definition name and contents
    """
    if isinstance(def_dict, DefinitionDict):
        def_dict = def_dict.defs

    return {key: str(value.contents) for key, value in def_dict.items()}

get_definition_entry ¶

get_definition_entry(def_tag)

Get the entry for a given def tag.

Does not validate at all.

Parameters:

Name	Type	Description	Default
`def_tag`	`HedTag`	Source HED tag that may be a Def or Def-expand tag.	required

Returns:

Name	Type	Description
`def_entry`	`DefinitionEntry or None`	The definition entry if it exists

Source code in hed/models/definition_dict.py

def get_definition_entry(self, def_tag):
    """ Get the entry for a given def tag.

        Does not validate at all.

    Parameters:
        def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.

    Returns:
        def_entry(DefinitionEntry or None): The definition entry if it exists
    """
    tag_label, _, placeholder = def_tag.extension.partition('/')

    label_tag_lower = tag_label.casefold()
    def_entry = self.defs.get(label_tag_lower)
    return def_entry

items ¶

items()

Return the dictionary of definitions.

Alias for .defs.items()

Returns:

Name	Type	Description
`def_entries`	`{str: DefinitionEntry}`	A list of definitions.

Source code in hed/models/definition_dict.py

def items(self):
    """ Return the dictionary of definitions.

        Alias for .defs.items()

    Returns:
        def_entries({str: DefinitionEntry}): A list of definitions.
    """
    return self.defs.items()

Models¶

HedString¶

HedString ¶

is_group property ¶

copy ¶

expand_defs ¶

find_top_level_tags ¶

from_hed_strings classmethod ¶

get_as_original ¶

remove_definitions ¶

remove_refs ¶

shrink_defs ¶

split_hed_string staticmethod ¶

split_into_groups staticmethod ¶

validate ¶

HedTag¶

HedTag ¶

attributes property ¶

base_tag property ¶

default_unit property ¶

expandable property ¶

expanded property ¶

extension property writable ¶

long_tag property ¶

org_base_tag property ¶

org_tag property ¶

schema_namespace property ¶

short_base_tag property writable ¶

short_tag property ¶

tag property writable ¶

unit_classes property ¶

value_classes property ¶

base_tag_has_attribute ¶

casefold ¶

copy ¶

get_stripped_unit_value ¶

get_tag_unit_class_units ¶

has_attribute ¶

is_basic_tag ¶

is_column_ref ¶

is_placeholder ¶

is_takes_value_tag ¶

is_unit_class_tag ¶

is_value_class_tag ¶

lower ¶

replace_placeholder ¶

tag_exists_in_schema ¶

tag_modified ¶

value_as_default_unit ¶

HedGroup¶

HedGroup ¶

is_group property ¶

span property ¶

append ¶

casefold ¶

check_if_in_original ¶

copy ¶

find_def_tags ¶

find_exact_tags ¶

find_placeholder_tag ¶

find_tags ¶

find_tags_with_term ¶

find_wildcard_tags ¶

get_all_groups ¶

get_all_tags ¶

get_as_form ¶

get_as_indented ¶

get_as_long ¶

get_as_short ¶

get_first_group ¶

get_original_hed_string ¶

groups ¶

lower ¶

remove ¶

replace staticmethod ¶

sort ¶

sorted ¶

tags ¶

Sidecar¶

Sidecar ¶

is_group `property` ¶

from_hed_strings `classmethod` ¶

split_hed_string `staticmethod` ¶

split_into_groups `staticmethod` ¶

attributes `property` ¶

base_tag `property` ¶

default_unit `property` ¶

expandable `property` ¶

expanded `property` ¶

extension `property` `writable` ¶

long_tag `property` ¶

org_base_tag `property` ¶

org_tag `property` ¶

schema_namespace `property` ¶

short_base_tag `property` `writable` ¶

short_tag `property` ¶

tag `property` `writable` ¶

unit_classes `property` ¶

value_classes `property` ¶

is_group `property` ¶

span `property` ¶

replace `staticmethod` ¶

all_hed_columns `property` ¶

column_data `property` ¶

def_dict `property` ¶

columns `property` ¶

dataframe `property` ¶

dataframe_a `property` ¶

has_column_names `property` ¶

loaded_workbook `property` ¶

name `property` ¶

needs_sorting `property` ¶

onsets `property` ¶

series_a `property` ¶

series_filtered `property` ¶

worksheet_name `property` ¶

combine_dataframe `staticmethod` ¶

column_prefix_dictionary `property` ¶

sidecar_column_data `property` ¶

tag_columns `property` ¶

check_for_blank_names `staticmethod` ¶

issues `property` ¶

get_as_strings `staticmethod` ¶