Skip to content

Models

Core data models for working with HED data structures.

HedString

HedString

Bases: HedGroup

A HED string with its schema and definitions.

Source code in hed/models/hed_string.py
class HedString(HedGroup):
    """ A HED string with its schema and definitions. """

    OPENING_GROUP_CHARACTER = '('
    CLOSING_GROUP_CHARACTER = ')'

    def __init__(self, hed_string, hed_schema, def_dict=None, _contents=None):
        """ Constructor for the HedString class.

        Parameters:
            hed_string (str): A HED string consisting of tags and tag groups.
            hed_schema (HedSchema): The schema to use to identify tags.
            def_dict (DefinitionDict or None): The def dict to use to identify def/def expand tags.
            _contents ([HedGroup and/or HedTag] or None): Create a HedString from this exact list of children.
                                                          Does not make a copy.
        Notes:
            - The HedString object parses its component tags and groups into a tree-like structure.

        """

        if _contents is not None:
            contents = _contents
        else:
            try:
                contents = self.split_into_groups(hed_string, hed_schema, def_dict)
            except ValueError:
                contents = []
        super().__init__(hed_string, contents=contents, startpos=0, endpos=len(hed_string))
        self._schema = hed_schema
        self._from_strings = None
        self._def_dict = def_dict

    @classmethod
    def from_hed_strings(cls, hed_strings):
        """ Factory for creating HedStrings via combination.

        Parameters:
            hed_strings (list or None): A list of HedString objects to combine.
                                        This takes ownership of their children.

        Returns:
            new_string(HedString): The newly combined HedString.
        """
        if not hed_strings:
            raise TypeError("Passed an empty list to from_hed_strings")
        new_string = HedString.__new__(HedString)
        hed_string = ",".join([group._hed_string for group in hed_strings])
        contents = [child for sub_string in hed_strings for child in sub_string.children]
        first_schema = hed_strings[0]._schema
        first_dict = hed_strings[0]._def_dict
        new_string.__init__(hed_string=hed_string, _contents=contents, hed_schema=first_schema, def_dict=first_dict)
        new_string._from_strings = hed_strings
        return new_string

    @property
    def is_group(self):
        """ Always False since the underlying string is not a group with parentheses. """
        return False

    def _calculate_to_canonical_forms(self, hed_schema):
        """ Identify all tags using the given schema.

        Parameters:
            hed_schema (HedSchema, HedSchemaGroup): The schema to use to validate/convert tags.

        Returns:
            list: A list of issues found while converting the string. Each issue is a dictionary.

        """
        validation_issues = []
        for tag in self.get_all_tags():
            validation_issues += tag._calculate_to_canonical_forms(hed_schema)

        return validation_issues

    def __deepcopy__(self, memo):
        # check if the object has already been copied
        if id(self) in memo:
            return memo[id(self)]

        # create a new instance of HedString class, and direct copy all parameters
        new_string = self.__class__.__new__(self.__class__)
        new_string.__dict__.update(self.__dict__)

        # add the new object to the memo dictionary
        memo[id(self)] = new_string

        # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry)
        new_string._original_children = copy.deepcopy(self._original_children, memo)
        new_string._from_strings = copy.deepcopy(self._from_strings, memo)
        new_string.children = copy.deepcopy(self.children, memo)

        return new_string

    def copy(self) -> 'HedString':
        """ Return a deep copy of this string.

        Returns:
            HedString: The copied group.

        """
        return_copy = copy.deepcopy(self)
        return return_copy

    def remove_definitions(self):
        """ Remove definition tags and groups from this string.

            This does not validate definitions and will blindly removing invalid ones as well.
        """
        definition_groups = self.find_top_level_tags({DefTagNames.DEFINITION_KEY}, include_groups=1)
        if definition_groups:
            self.remove(definition_groups)

    def shrink_defs(self) -> 'HedString':
        """ Replace def-expand tags with def tags.

            This does not validate them and will blindly shrink invalid ones as well.

        Returns:
            self
        """
        for def_expand_tag, def_expand_group in self.find_tags({DefTagNames.DEF_EXPAND_KEY}, recursive=True):
            expanded_parent = def_expand_group._parent
            if expanded_parent:
                def_expand_tag.short_base_tag = DefTagNames.DEF_KEY
                def_expand_tag._parent = expanded_parent
                expanded_parent.replace(def_expand_group, def_expand_tag)

        return self

    def expand_defs(self) -> "HedString":
        """ Replace def tags with def-expand tags.

            This does very minimal validation.

        Returns:
            HedString: self
        """
        def_tags = self.find_def_tags(recursive=True, include_groups=0)

        replacements = []
        for tag in def_tags:
            if tag.expandable and not tag.expanded:
                replacements.append((tag, tag.expandable))

        for tag, group in replacements:
            tag_parent = tag._parent
            tag_parent.replace(tag, group)
            tag._parent = group
            tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY

        return self

    def get_as_original(self) -> str:
        """ Return the original form of this string.

        Returns:
            str: The string with all the tags in their original form.

        Notes:
            Potentially with some extraneous spaces removed on returned string.
        """
        return self.get_as_form("org_tag")

    @staticmethod
    def split_into_groups(hed_string, hed_schema, def_dict=None):
        """ Split the HED string into a parse tree.

        Parameters:
            hed_string (str): A HED string consisting of tags and tag groups to be processed.
            hed_schema (HedSchema): HED schema to use to identify tags.
            def_dict (DefinitionDict): The definitions to identify.
        Returns:
            list:  A list of HedTag and/or HedGroup.

        :raises ValueError:
            - The string is significantly malformed, such as mismatched parentheses.

        Notes:
            - The parse tree consists of tag groups, tags, and delimiters.
        """
        current_tag_group = [[]]

        input_tags = HedString.split_hed_string(hed_string)
        for is_hed_tag, (startpos, endpos) in input_tags:
            if is_hed_tag:
                new_tag = HedTag(hed_string, hed_schema, (startpos, endpos), def_dict)
                current_tag_group[-1].append(new_tag)
            else:
                string_portion = hed_string[startpos:endpos]
                delimiter_index = 0
                for i, char in enumerate(string_portion):
                    if not char.isspace():
                        delimiter_index = i
                        break

                delimiter_char = string_portion[delimiter_index]

                if delimiter_char is HedString.OPENING_GROUP_CHARACTER:
                    current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))

                if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
                    # Terminate existing group, and save it off.
                    paren_end = startpos + delimiter_index + 1

                    if len(current_tag_group) > 1:
                        new_group = current_tag_group.pop()
                        new_group._endpos = paren_end

                        current_tag_group[-1].append(new_group)
                    else:
                        raise ValueError(f"Closing parentheses in HED string {hed_string}")

        # Comma delimiter issues are ignored and assumed already validated currently.
        if len(current_tag_group) != 1:
            raise ValueError(f"Unmatched opening parentheses in HED string {hed_string}")

        return current_tag_group[0]

    def _get_org_span(self, tag_or_group):
        """ If this tag or group was in the original HED string, find its original span.

        Parameters:
            tag_or_group (HedTag or HedGroup): The HED tag to locate in this string.

        Returns:
            int or None:   Starting position of the given item in the original string.
            int or None:   Ending position of the given item in the original string.

        Notes:
            - If the HED tag or group was not in the original string, returns (None, None).

        """
        if self._from_strings:
            return self._get_org_span_from_strings(tag_or_group)

        if self.check_if_in_original(tag_or_group):
            return tag_or_group.span

        return None, None

    def _get_org_span_from_strings(self, tag_or_group):
        """ A different case of the above, to handle if this was created from HED string objects."""
        found_string = None
        string_start_index = 0
        for string in self._from_strings:
            if string.check_if_in_original(tag_or_group):
                found_string = string
                break
            # Add 1 for comma
            string_start_index += string.span[1] + 1

        if not found_string:
            return None, None

        return tag_or_group.span[0] + string_start_index, tag_or_group.span[1] + string_start_index

    @staticmethod
    def split_hed_string(hed_string) -> list[tuple[bool, tuple[int, int]]]:
        """ Split a HED string into delimiters and tags.

        Parameters:
            hed_string (str): The HED string to split.

        Returns:
            list[tuple[bool, tuple[int, int]]]:  A list of tuples where each tuple is (is_hed_tag, (start_pos, end_pos)).

        Notes:
            - The tuple format is as follows
                - is_hed_tag (bool): A (possible) HED tag if True, delimiter if not.
                - start_pos (int):   Index of start of string in hed_string.
                - end_pos (int):     Index of end of string in hed_string.

            - This function does not validate tags or delimiters in any form.

        """
        tag_delimiters = ",()"
        current_spacing = 0
        found_symbol = True
        result_positions = []
        tag_start_pos = None
        last_end_pos = 0
        for i, char in enumerate(hed_string):
            if char == " ":
                current_spacing += 1
                continue

            if char in tag_delimiters:
                if found_symbol:
                    if last_end_pos != i:
                        result_positions.append((False, (last_end_pos, i)))
                    last_end_pos = i
                elif not found_symbol:
                    found_symbol = True
                    last_end_pos = i - current_spacing
                    result_positions.append((True, (tag_start_pos, last_end_pos)))
                    current_spacing = 0
                    tag_start_pos = None
                continue

            # If we have a current delimiter, end it here.
            if found_symbol and last_end_pos is not None:
                if last_end_pos != i:
                    result_positions.append((False, (last_end_pos, i)))
                last_end_pos = None

            found_symbol = False
            current_spacing = 0
            if tag_start_pos is None:
                tag_start_pos = i

        if last_end_pos is not None and len(hed_string) != last_end_pos:
            result_positions.append((False, (last_end_pos, len(hed_string))))
        if tag_start_pos is not None:
            result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
            if current_spacing:
                result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))

        return result_positions

    def validate(self, allow_placeholders=True, error_handler=None) -> list[dict]:
        """ Validate the string using the schema.

        Parameters:
            allow_placeholders (bool): Allow placeholders in the string.
            error_handler (ErrorHandler or None): The error handler to use, creates a default one if none passed.
        Returns:
            list[dict]: A list of issues for HED string.
        """
        from hed.validator import HedValidator

        validator = HedValidator(self._schema, def_dicts=self._def_dict)
        return validator.validate(self, allow_placeholders=allow_placeholders, error_handler=error_handler)

    def find_top_level_tags(self, anchor_tags, include_groups=2):
        """ Find top level groups with an anchor tag.

            A max of 1 tag located per top level group.

        Parameters:
            anchor_tags (container):  A list/set/etc. of short_base_tags to find groups by.
            include_groups (0, 1 or 2):  Parameter indicating what return values to include.
                If 0: return only tags.
                If 1: return only groups.
                If 2 or any other value: return both.
        Returns:
            list: The returned result depends on include_groups.
        """
        anchor_tags = {tag.casefold() for tag in anchor_tags}
        top_level_tags = []
        for group in self.groups():
            for tag in group.tags():
                if tag.short_base_tag.casefold() in anchor_tags:
                    top_level_tags.append((tag, group))
                    # Only capture a max of 1 per group.  These are implicitly unique.
                    break

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in top_level_tags]
        return top_level_tags

    def remove_refs(self):
        """ Remove any refs(tags contained entirely inside curly braces) from the string.

            This does NOT validate the contents of the curly braces.  This is only relevant when directly
            editing sidecar strings.  Tools will naturally ignore these.
        """
        ref_tags = [tag for tag in self.get_all_tags() if tag.is_column_ref()]
        if ref_tags:
            self.remove(ref_tags)

is_group property

is_group

Always False since the underlying string is not a group with parentheses.

copy

copy() -> 'HedString'

Return a deep copy of this string.

Returns:

Name Type Description
HedString 'HedString'

The copied group.

Source code in hed/models/hed_string.py
def copy(self) -> 'HedString':
    """ Return a deep copy of this string.

    Returns:
        HedString: The copied group.

    """
    return_copy = copy.deepcopy(self)
    return return_copy

expand_defs

expand_defs() -> 'HedString'

Replace def tags with def-expand tags.

This does very minimal validation.

Returns:

Name Type Description
HedString 'HedString'

self

Source code in hed/models/hed_string.py
def expand_defs(self) -> "HedString":
    """ Replace def tags with def-expand tags.

        This does very minimal validation.

    Returns:
        HedString: self
    """
    def_tags = self.find_def_tags(recursive=True, include_groups=0)

    replacements = []
    for tag in def_tags:
        if tag.expandable and not tag.expanded:
            replacements.append((tag, tag.expandable))

    for tag, group in replacements:
        tag_parent = tag._parent
        tag_parent.replace(tag, group)
        tag._parent = group
        tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY

    return self

find_top_level_tags

find_top_level_tags(anchor_tags, include_groups=2)

Find top level groups with an anchor tag.

A max of 1 tag located per top level group.

Parameters:

Name Type Description Default
anchor_tags container

A list/set/etc. of short_base_tags to find groups by.

required
include_groups (0, 1 or 2)

Parameter indicating what return values to include. If 0: return only tags. If 1: return only groups. If 2 or any other value: return both.

2

Returns: list: The returned result depends on include_groups.

Source code in hed/models/hed_string.py
def find_top_level_tags(self, anchor_tags, include_groups=2):
    """ Find top level groups with an anchor tag.

        A max of 1 tag located per top level group.

    Parameters:
        anchor_tags (container):  A list/set/etc. of short_base_tags to find groups by.
        include_groups (0, 1 or 2):  Parameter indicating what return values to include.
            If 0: return only tags.
            If 1: return only groups.
            If 2 or any other value: return both.
    Returns:
        list: The returned result depends on include_groups.
    """
    anchor_tags = {tag.casefold() for tag in anchor_tags}
    top_level_tags = []
    for group in self.groups():
        for tag in group.tags():
            if tag.short_base_tag.casefold() in anchor_tags:
                top_level_tags.append((tag, group))
                # Only capture a max of 1 per group.  These are implicitly unique.
                break

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in top_level_tags]
    return top_level_tags

from_hed_strings classmethod

from_hed_strings(hed_strings)

Factory for creating HedStrings via combination.

Parameters:

Name Type Description Default
hed_strings list or None

A list of HedString objects to combine. This takes ownership of their children.

required

Returns:

Name Type Description
new_string HedString

The newly combined HedString.

Source code in hed/models/hed_string.py
@classmethod
def from_hed_strings(cls, hed_strings):
    """ Factory for creating HedStrings via combination.

    Parameters:
        hed_strings (list or None): A list of HedString objects to combine.
                                    This takes ownership of their children.

    Returns:
        new_string(HedString): The newly combined HedString.
    """
    if not hed_strings:
        raise TypeError("Passed an empty list to from_hed_strings")
    new_string = HedString.__new__(HedString)
    hed_string = ",".join([group._hed_string for group in hed_strings])
    contents = [child for sub_string in hed_strings for child in sub_string.children]
    first_schema = hed_strings[0]._schema
    first_dict = hed_strings[0]._def_dict
    new_string.__init__(hed_string=hed_string, _contents=contents, hed_schema=first_schema, def_dict=first_dict)
    new_string._from_strings = hed_strings
    return new_string

get_as_original

get_as_original() -> str

Return the original form of this string.

Returns:

Name Type Description
str str

The string with all the tags in their original form.

Notes

Potentially with some extraneous spaces removed on returned string.

Source code in hed/models/hed_string.py
def get_as_original(self) -> str:
    """ Return the original form of this string.

    Returns:
        str: The string with all the tags in their original form.

    Notes:
        Potentially with some extraneous spaces removed on returned string.
    """
    return self.get_as_form("org_tag")

remove_definitions

remove_definitions()

Remove definition tags and groups from this string.

This does not validate definitions and will blindly removing invalid ones as well.

Source code in hed/models/hed_string.py
def remove_definitions(self):
    """ Remove definition tags and groups from this string.

        This does not validate definitions and will blindly removing invalid ones as well.
    """
    definition_groups = self.find_top_level_tags({DefTagNames.DEFINITION_KEY}, include_groups=1)
    if definition_groups:
        self.remove(definition_groups)

remove_refs

remove_refs()

Remove any refs(tags contained entirely inside curly braces) from the string.

This does NOT validate the contents of the curly braces. This is only relevant when directly editing sidecar strings. Tools will naturally ignore these.

Source code in hed/models/hed_string.py
def remove_refs(self):
    """ Remove any refs(tags contained entirely inside curly braces) from the string.

        This does NOT validate the contents of the curly braces.  This is only relevant when directly
        editing sidecar strings.  Tools will naturally ignore these.
    """
    ref_tags = [tag for tag in self.get_all_tags() if tag.is_column_ref()]
    if ref_tags:
        self.remove(ref_tags)

shrink_defs

shrink_defs() -> 'HedString'

Replace def-expand tags with def tags.

This does not validate them and will blindly shrink invalid ones as well.

Returns:

Type Description
'HedString'

self

Source code in hed/models/hed_string.py
def shrink_defs(self) -> 'HedString':
    """ Replace def-expand tags with def tags.

        This does not validate them and will blindly shrink invalid ones as well.

    Returns:
        self
    """
    for def_expand_tag, def_expand_group in self.find_tags({DefTagNames.DEF_EXPAND_KEY}, recursive=True):
        expanded_parent = def_expand_group._parent
        if expanded_parent:
            def_expand_tag.short_base_tag = DefTagNames.DEF_KEY
            def_expand_tag._parent = expanded_parent
            expanded_parent.replace(def_expand_group, def_expand_tag)

    return self

split_hed_string staticmethod

split_hed_string(
    hed_string,
) -> list[tuple[bool, tuple[int, int]]]

Split a HED string into delimiters and tags.

Parameters:

Name Type Description Default
hed_string str

The HED string to split.

required

Returns:

Type Description
list[tuple[bool, tuple[int, int]]]

list[tuple[bool, tuple[int, int]]]: A list of tuples where each tuple is (is_hed_tag, (start_pos, end_pos)).

Notes
  • The tuple format is as follows

    • is_hed_tag (bool): A (possible) HED tag if True, delimiter if not.
    • start_pos (int): Index of start of string in hed_string.
    • end_pos (int): Index of end of string in hed_string.
  • This function does not validate tags or delimiters in any form.

Source code in hed/models/hed_string.py
@staticmethod
def split_hed_string(hed_string) -> list[tuple[bool, tuple[int, int]]]:
    """ Split a HED string into delimiters and tags.

    Parameters:
        hed_string (str): The HED string to split.

    Returns:
        list[tuple[bool, tuple[int, int]]]:  A list of tuples where each tuple is (is_hed_tag, (start_pos, end_pos)).

    Notes:
        - The tuple format is as follows
            - is_hed_tag (bool): A (possible) HED tag if True, delimiter if not.
            - start_pos (int):   Index of start of string in hed_string.
            - end_pos (int):     Index of end of string in hed_string.

        - This function does not validate tags or delimiters in any form.

    """
    tag_delimiters = ",()"
    current_spacing = 0
    found_symbol = True
    result_positions = []
    tag_start_pos = None
    last_end_pos = 0
    for i, char in enumerate(hed_string):
        if char == " ":
            current_spacing += 1
            continue

        if char in tag_delimiters:
            if found_symbol:
                if last_end_pos != i:
                    result_positions.append((False, (last_end_pos, i)))
                last_end_pos = i
            elif not found_symbol:
                found_symbol = True
                last_end_pos = i - current_spacing
                result_positions.append((True, (tag_start_pos, last_end_pos)))
                current_spacing = 0
                tag_start_pos = None
            continue

        # If we have a current delimiter, end it here.
        if found_symbol and last_end_pos is not None:
            if last_end_pos != i:
                result_positions.append((False, (last_end_pos, i)))
            last_end_pos = None

        found_symbol = False
        current_spacing = 0
        if tag_start_pos is None:
            tag_start_pos = i

    if last_end_pos is not None and len(hed_string) != last_end_pos:
        result_positions.append((False, (last_end_pos, len(hed_string))))
    if tag_start_pos is not None:
        result_positions.append((True, (tag_start_pos, len(hed_string) - current_spacing)))
        if current_spacing:
            result_positions.append((False, (len(hed_string) - current_spacing, len(hed_string))))

    return result_positions

split_into_groups staticmethod

split_into_groups(hed_string, hed_schema, def_dict=None)

Split the HED string into a parse tree.

Parameters:

Name Type Description Default
hed_string str

A HED string consisting of tags and tag groups to be processed.

required
hed_schema HedSchema

HED schema to use to identify tags.

required
def_dict DefinitionDict

The definitions to identify.

None

Returns: list: A list of HedTag and/or HedGroup.

:raises ValueError: - The string is significantly malformed, such as mismatched parentheses.

Notes
  • The parse tree consists of tag groups, tags, and delimiters.
Source code in hed/models/hed_string.py
@staticmethod
def split_into_groups(hed_string, hed_schema, def_dict=None):
    """ Split the HED string into a parse tree.

    Parameters:
        hed_string (str): A HED string consisting of tags and tag groups to be processed.
        hed_schema (HedSchema): HED schema to use to identify tags.
        def_dict (DefinitionDict): The definitions to identify.
    Returns:
        list:  A list of HedTag and/or HedGroup.

    :raises ValueError:
        - The string is significantly malformed, such as mismatched parentheses.

    Notes:
        - The parse tree consists of tag groups, tags, and delimiters.
    """
    current_tag_group = [[]]

    input_tags = HedString.split_hed_string(hed_string)
    for is_hed_tag, (startpos, endpos) in input_tags:
        if is_hed_tag:
            new_tag = HedTag(hed_string, hed_schema, (startpos, endpos), def_dict)
            current_tag_group[-1].append(new_tag)
        else:
            string_portion = hed_string[startpos:endpos]
            delimiter_index = 0
            for i, char in enumerate(string_portion):
                if not char.isspace():
                    delimiter_index = i
                    break

            delimiter_char = string_portion[delimiter_index]

            if delimiter_char is HedString.OPENING_GROUP_CHARACTER:
                current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))

            if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
                # Terminate existing group, and save it off.
                paren_end = startpos + delimiter_index + 1

                if len(current_tag_group) > 1:
                    new_group = current_tag_group.pop()
                    new_group._endpos = paren_end

                    current_tag_group[-1].append(new_group)
                else:
                    raise ValueError(f"Closing parentheses in HED string {hed_string}")

    # Comma delimiter issues are ignored and assumed already validated currently.
    if len(current_tag_group) != 1:
        raise ValueError(f"Unmatched opening parentheses in HED string {hed_string}")

    return current_tag_group[0]

validate

validate(
    allow_placeholders=True, error_handler=None
) -> list[dict]

Validate the string using the schema.

Parameters:

Name Type Description Default
allow_placeholders bool

Allow placeholders in the string.

True
error_handler ErrorHandler or None

The error handler to use, creates a default one if none passed.

None

Returns: list[dict]: A list of issues for HED string.

Source code in hed/models/hed_string.py
def validate(self, allow_placeholders=True, error_handler=None) -> list[dict]:
    """ Validate the string using the schema.

    Parameters:
        allow_placeholders (bool): Allow placeholders in the string.
        error_handler (ErrorHandler or None): The error handler to use, creates a default one if none passed.
    Returns:
        list[dict]: A list of issues for HED string.
    """
    from hed.validator import HedValidator

    validator = HedValidator(self._schema, def_dicts=self._def_dict)
    return validator.validate(self, allow_placeholders=allow_placeholders, error_handler=error_handler)

HedTag

HedTag

A single HED tag.

Notes
  • HedTag is a smart class in that it keeps track of its original value and positioning as well as pointers to the relevant HED schema information, if relevant.
Source code in hed/models/hed_tag.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
class HedTag:
    """ A single HED tag.

    Notes:
        - HedTag is a smart class in that it keeps track of its original value and positioning
          as well as pointers to the relevant HED schema information, if relevant.

    """

    def __init__(self, hed_string, hed_schema, span=None, def_dict=None):
        """ Creates a HedTag.

        Parameters:
            hed_string (str): Source HED string for this tag.
            hed_schema (HedSchema): A parameter for calculating canonical forms on creation.
            span  (int, int): The start and end indexes of the tag in the hed_string.
            def_dict (DefinitionDict or None): The def dict to use to identify def/def expand tags.
        """
        self._hed_string = hed_string
        if span is None:
            span = (0, len(hed_string))
        # This is the span into the original HED string for this tag
        self.span = span

        # If this is present, use this as the org tag for most purposes.
        # This is not generally used anymore, but you can use it to replace a tag in place.
        self._tag = None

        self._namespace = self._get_schema_namespace(self.org_tag)

        # This is the schema this tag was converted to.
        self._schema = None
        self._schema_entry = None

        self._extension_value = ""
        self._parent = None

        self._expandable = None
        self._expanded = False

        self.tag_terms = None  # tuple of all the terms in this tag Lowercase.
        self._calculate_to_canonical_forms(hed_schema)

        self._def_entry = None
        if def_dict:
            if self.short_base_tag in {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY}:
                self._def_entry = def_dict.get_definition_entry(self)

    def copy(self) -> "HedTag":
        """ Return a deep copy of this tag.

        Returns:
            HedTag: The copied group.

        """
        save_parent = self._parent
        self._parent = None
        return_copy = copy.deepcopy(self)
        self._parent = save_parent
        return return_copy

    @property
    def schema_namespace(self) -> str:
        """ Library namespace for this tag if one exists.

        Returns:
            namespace (str): The library namespace, including the colon.

        """
        return self._namespace

    @property
    def short_tag(self) -> str:
        """ Short form including value or extension.

        Returns:
            str: The short form of the tag, including value or extension.

        """
        if self._schema_entry:
            return f"{self._namespace}{self._schema_entry.short_tag_name}{self._extension_value}"

        return str(self)

    @property
    def base_tag(self) -> str:
        """ Long form without value or extension.

        Returns:
            base_tag (str): The long form of the tag, without value or extension.
        """
        if self._schema_entry:
            return self._schema_entry.long_tag_name
        return str(self)

    @property
    def short_base_tag(self) -> str:
        """ Short form without value or extension.

        Returns:
            str: The short non-extension port of a tag.

        Notes:
            - ParentNodes/Def/DefName would return just "Def".

        """
        if self._schema_entry:
            return self._schema_entry.short_tag_name
        return str(self)

    @short_base_tag.setter
    def short_base_tag(self, new_tag_val):
        """ Change base tag, leaving extension or value.

        Parameters:
            new_tag_val (str): The new short_base_tag for this tag.

        :raises ValueError:
            - If the tag wasn't already identified.

        Note:
            - Generally this is used to swap def to def-expand.
        """
        if self._schema_entry:
            tag_entry = None
            if self._schema:
                if self.is_takes_value_tag():
                    new_tag_val = new_tag_val + "/#"
                tag_entry = self._schema.get_tag_entry(new_tag_val, schema_namespace=self.schema_namespace)

            self._schema_entry = tag_entry
        else:
            raise ValueError("Cannot set unidentified tags")

    @property
    def org_base_tag(self) -> str:
        """ Original form without value or extension.

        Returns:
            str: The original form of the tag, without value or extension.

        Notes:
            - Warning: This could be empty if the original tag had a name_prefix prepended.
              e.g. a column where "Label/" is prepended, thus the column value has zero base portion.
        """
        if self._schema_entry:
            extension_len = len(self._extension_value)
            if not extension_len:
                return self.tag

            org_len = len(self.tag)
            if org_len == extension_len:
                return ""

            return self.tag[:org_len - extension_len]
        return str(self)

    def tag_modified(self) -> bool:
        """ Return True if tag has been modified from original.

        Returns:
            bool: Return True if the tag is modified.

        Notes:
            - Modifications can include adding a column name_prefix.

        """
        return bool(self._tag)

    @property
    def tag(self) -> str:
        """ Returns the tag.

            Returns the original tag if no user form set.

        Returns:
            str: The custom set user form of the tag.

        """
        if self._tag:
            return self._tag

        return self.org_tag

    @tag.setter
    def tag(self, new_tag_val):
        """ Allow you to overwrite the tag output text.

        Parameters:
            new_tag_val (str): New (implicitly long form) of tag to set.

        Notes:
            - You probably don't actually want to call this.
        """
        self._tag = new_tag_val
        self._schema_entry = None
        self._calculate_to_canonical_forms(self._schema)

    @property
    def extension(self) -> str:
        """ Get the extension or value of tag.

            Generally this is just the portion after the last slash.
            Returns an empty string if no extension or value.

        Returns:
            str: The tag name.

        Notes:
            - This tag must have been computed first.

        """
        if self._extension_value:
            return self._extension_value[1:]

        return ""

    @extension.setter
    def extension(self, x):
        self._extension_value = f"/{x}"

    @property
    def long_tag(self) -> str:
        """ Long form including value or extension.

        Returns:
            str: The long form of this tag.

        """
        if self._schema_entry:
            return f"{self._namespace}{self._schema_entry.long_tag_name}{self._extension_value}"
        return str(self)

    @property
    def org_tag(self) -> str:
        """ Return the original unmodified tag.

        Returns:
            str: The original unmodified tag.

        """
        return self._hed_string[self.span[0]:self.span[1]]

    @property
    def expanded(self) -> bool:
        """Return if this is currently expanded or not.

           Will always be False unless expandable is set.  This is primarily used for Def/Def-expand tags at present.

        Returns:
            bool: True if this is currently expanded.
        """
        return self._expanded

    @property
    def expandable(self) -> Union["HedGroup", "HedTag", None]:
        """Return what this expands to.

           This is primarily used for Def/Def-expand tags at present.

           Lazily set the first time it's called.

        Returns:
            Union[HedGroup,HedTag,None]: Returns the expanded form of this tag.
        """
        if self._expandable is None and self._def_entry:
            save_parent = self._parent
            tag_label, _, placeholder = self.extension.partition('/')

            def_contents = self._def_entry.get_definition(self, placeholder_value=placeholder)
            self._parent = save_parent
            if def_contents is not None:
                self._expandable = def_contents
                self._expanded = self.short_base_tag == DefTagNames.DEF_EXPAND_KEY
        return self._expandable

    def is_column_ref(self) -> bool:
        """ Return if this tag is a column reference from a sidecar.

            You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.

        Returns:
            bool: Returns True if this is a column ref.
        """
        return self.org_tag.startswith('{') and self.org_tag.endswith('}')

    def __str__(self) -> str:
        """ Convert this HedTag to a string.

        Returns:
            str: The original tag if we haven't set a new tag.(e.g. short to long).

        """
        if self._schema_entry:
            return self.short_tag

        if self._tag:
            return self._tag

        return self._hed_string[self.span[0]:self.span[1]]

    def lower(self) -> str:
        """ Convenience function, equivalent to str(self).lower(). """
        return str(self).lower()

    def casefold(self) -> str:
        """ Convenience function, equivalent to str(self).casefold(). """
        return str(self).casefold()

    def _calculate_to_canonical_forms(self, hed_schema) -> list:
        """ Update internal state based on schema.

        Parameters:
            hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag.

        Returns:
            list:  A list of issues found during conversion. Each element is a dictionary.

        """
        tag_entry, remainder, tag_issues = hed_schema.find_tag_entry(self, self.schema_namespace)
        self._schema_entry = tag_entry
        self._schema = hed_schema
        if self._schema_entry:
            self.tag_terms = self._schema_entry.tag_terms
            if remainder:
                self._extension_value = remainder
        else:
            self.tag_terms = tuple()

        return tag_issues

    def get_stripped_unit_value(self, extension_text) -> tuple[Union[str, None], Union[str, None]]:
        """ Return the extension divided into value and units, if the units are valid.

        Parameters:
            extension_text (str): The text to split, in case it's a portion of a tag.

        Returns:
            str or None: The extension portion with the units removed or None if invalid units.
            str or None: The units or None if no units of the right unit class are found.

        Examples:
            'Duration/3 ms' will return ('3', 'ms')

        """
        tag_unit_classes = self.unit_classes
        stripped_value, units, match = HedTag._get_tag_units_portion(extension_text, tag_unit_classes)
        if stripped_value and match:
            return stripped_value, units
        elif units and not match:
            return None, units
        return extension_text, None

    def value_as_default_unit(self) -> Union[float, None]:
        """ Return the value converted to default units if possible or None if invalid.

        Returns:
            float or None: The extension value in default units.
                                   If no default units it assumes that the extension value is in default units.

        Examples:
            'Duration/300 ms' will return .3

        """
        tag_unit_classes = self.unit_classes
        stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes)
        if not stripped_value:
            return None
        if unit and not unit_entry:
            return None
        if unit and unit_entry and unit_entry.get_conversion_factor(unit) is not None:
            return float(stripped_value) * unit_entry.get_conversion_factor(unit)
        return float(stripped_value)

    @property
    def unit_classes(self) -> dict:
        """ Return a dict of all the unit classes this tag accepts.

        Returns:
            dict:  A dict of unit classes this tag accepts.

        Notes:
            - Returns empty dict if this is not a unit class tag.
            - The dictionary has unit name as the key and HedSchemaEntry as value.

        """
        if self._schema_entry:
            return self._schema_entry.unit_classes
        return {}

    @property
    def value_classes(self) -> dict:
        """ Return a dict of all the value classes this tag accepts.

        Returns:
            dict: A dictionary of HedSchemaEntry value classes this tag accepts.

        Notes:
            - Returns empty dict if this is not a value class.
            - The dictionary has unit name as the key and HedSchemaEntry as value.

        """
        if self._schema_entry:
            return self._schema_entry.value_classes
        return {}

    @property
    def attributes(self) -> dict:
        """ Return a dict of all the attributes this tag has.

            Returns empty dict if this is not a value tag.

        Returns:
            dict: A dict of attributes this tag has.

        Notes:
            - Returns empty dict if this is not a unit class tag.
            - The dictionary has unit name as the key and HedSchemaEntry as value.

        """
        if self._schema_entry:
            return self._schema_entry.attributes
        return {}

    def tag_exists_in_schema(self) -> bool:
        """ Return whether the schema entry for this tag exists.

        Returns:
            bool: True if this tag exists.

        Notes:
            - This does NOT assure this is a valid tag.
        """
        return bool(self._schema_entry)

    def is_takes_value_tag(self) -> bool:
        """ Return True if this is a takes value tag.

        Returns:
            bool: True if this is a takes value tag.

        """
        if self._schema_entry:
            return self._schema_entry.has_attribute(HedKey.TakesValue)
        return False

    def is_unit_class_tag(self) -> bool:
        """ Return True if this is a unit class tag.

        Returns:
            bool: True if this is a unit class tag.

        """
        if self._schema_entry:
            return bool(self._schema_entry.unit_classes)
        return False

    def is_value_class_tag(self) -> bool:
        """ Return True if this is a value class tag.

        Returns:
            bool:  True if this is a tag with a value class.

        """
        if self._schema_entry:
            return bool(self._schema_entry.value_classes)
        return False

    def is_basic_tag(self) -> bool:
        """  Return True if a known tag with no extension or value.

        Returns:
            bool:  True if this is a known tag without extension or value.

        """
        return bool(self._schema_entry and not self.extension)

    def has_attribute(self, attribute) -> bool:
        """ Return True if this is an attribute this tag has.

        Parameters:
            attribute (str): Name of the attribute.

        Returns:
            bool: True if this tag has the attribute.

        """
        if self._schema_entry:
            return self._schema_entry.has_attribute(attribute)
        return False

    def get_tag_unit_class_units(self) -> list:
        """ Get the unit class units associated with a particular tag.

        Returns:
            list: A list containing the unit class units associated with a particular tag or an empty list.

        """
        units = []
        unit_classes = self.unit_classes
        for unit_class_entry in unit_classes.values():
            units += unit_class_entry.units.keys()

        return units

    @property
    def default_unit(self):
        """ Get the default unit class unit for this tag.

            Only a tag with a single unit class can have default units.

        Returns:
            unit(UnitEntry or None): the default unit entry for this tag, or None
        """
        # todo: Make this cached
        unit_classes = self.unit_classes.values()
        if len(unit_classes) == 1:
            first_unit_class_entry = list(unit_classes)[0]
            default_unit = first_unit_class_entry.has_attribute(HedKey.DefaultUnits, return_value=True)
            return first_unit_class_entry.units.get(default_unit, None)

    def base_tag_has_attribute(self, tag_attribute) -> bool:
        """ Check to see if the tag has a specific attribute.

            This is primarily used to check for things like TopLevelTag on Definitions and similar.

        Parameters:
            tag_attribute (str): A tag attribute.

        Returns:
            bool: True if the tag has the specified attribute. False, if otherwise.

        """
        if not self._schema_entry:
            return False

        return self._schema_entry.base_tag_has_attribute(tag_attribute)

    @staticmethod
    def _get_schema_namespace(org_tag) -> str:
        """ Finds the library namespace for the tag.

        Parameters:
            org_tag (str): A string representing a tag.

        Returns:
            str: Library namespace string or empty.

        """
        first_slash = org_tag.find("/")
        first_colon = org_tag.find(":")

        if first_colon != -1:
            if first_slash != -1 and first_colon > first_slash:
                return ""

            return org_tag[:first_colon + 1]
        return ""

    @staticmethod
    def _get_tag_units_portion(extension_text, tag_unit_classes):
        """ Split a value portion into value, units and its valid unitEntry (if any).

        Parameters:
            extension_text (str): A string representing the value portion of a tag with unit classes.
            tag_unit_classes (dict): Dictionary of valid UnitClassEntry objects for this tag.

        Returns:
            stripped_value (str or None): The value with the units removed.
                                          This is filled in if there are no units as well.
            units (str or None); The units string or None if no units.
            unitEntry (UnitEntry or None): The matching unit entry if one is found

        Notes:
            value, None, None  -- value portion has no units.
            value, units, unitEntry -- value portion has value and valid units.
            value, units, None -- value portion has a value and invalid units.

        """
        value, _, units = extension_text.partition(" ")
        if not units:
            return value, None, None

        for unit_class_entry in tag_unit_classes.values():
            possible_match = unit_class_entry.get_derivative_unit_entry(units)
            if possible_match:
                return value, units, possible_match
        return value, units, None

    def is_placeholder(self) -> bool:
        """Returns if this tag has a placeholder in it.

        Returns:
            has_placeholder(bool): True if it has a placeholder
        """
        if "#" in self.org_tag or "#" in self._extension_value:
            return True
        return False

    def replace_placeholder(self, placeholder_value):
        """ If tag has a placeholder character(#), replace with value.

        Parameters:
            placeholder_value (str): Value to replace placeholder with.

        """
        if self.is_placeholder():
            if self._schema_entry:
                tag = self.tag.replace('#', placeholder_value)
                self._extension_value = self._extension_value.replace("#", placeholder_value)
                self.tag = tag
            else:
                self._tag = self.tag.replace("#", placeholder_value)

    def get_normalized_str(self):
        if self._schema_entry:
            return self._namespace + self._schema_entry.short_tag_name.casefold() + self._extension_value.casefold()
        else:
            return self.casefold()

    def __hash__(self):
        return hash(self.get_normalized_str())

    def __eq__(self, other):
        if self is other:
            return True

        if isinstance(other, str):
            return self.casefold() == other.casefold()

        if not isinstance(other, HedTag):
            return False

        if self.short_tag == other.short_tag:
            return True

        if self.org_tag.casefold() == other.org_tag.casefold():
            return True
        return False

    def __deepcopy__(self, memo):
        # Check if the object has already been copied.
        if id(self) in memo:
            return memo[id(self)]

        # create a new instance of HedTag class
        new_tag = self.__class__.__new__(self.__class__)
        new_tag.__dict__.update(self.__dict__)

        # add the new object to the memo dictionary
        memo[id(self)] = new_tag

        # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry)
        new_tag._parent = copy.deepcopy(self._parent, memo)
        new_tag._expandable = copy.deepcopy(self._expandable, memo)
        new_tag._expanded = copy.deepcopy(self._expanded, memo)

        return new_tag

attributes property

attributes: dict

Return a dict of all the attributes this tag has.

Returns empty dict if this is not a value tag.

Returns:

Name Type Description
dict dict

A dict of attributes this tag has.

Notes
  • Returns empty dict if this is not a unit class tag.
  • The dictionary has unit name as the key and HedSchemaEntry as value.

base_tag property

base_tag: str

Long form without value or extension.

Returns:

Name Type Description
base_tag str

The long form of the tag, without value or extension.

default_unit property

default_unit

Get the default unit class unit for this tag.

Only a tag with a single unit class can have default units.

Returns:

Name Type Description
unit UnitEntry or None

the default unit entry for this tag, or None

expandable property

expandable: Union['HedGroup', 'HedTag', None]

Return what this expands to.

This is primarily used for Def/Def-expand tags at present.

Lazily set the first time it's called.

Returns:

Type Description
Union['HedGroup', 'HedTag', None]

Union[HedGroup,HedTag,None]: Returns the expanded form of this tag.

expanded property

expanded: bool

Return if this is currently expanded or not.

Will always be False unless expandable is set. This is primarily used for Def/Def-expand tags at present.

Returns:

Name Type Description
bool bool

True if this is currently expanded.

extension property writable

extension: str

Get the extension or value of tag.

Generally this is just the portion after the last slash.
Returns an empty string if no extension or value.

Returns:

Name Type Description
str str

The tag name.

Notes
  • This tag must have been computed first.

long_tag property

long_tag: str

Long form including value or extension.

Returns:

Name Type Description
str str

The long form of this tag.

org_base_tag property

org_base_tag: str

Original form without value or extension.

Returns:

Name Type Description
str str

The original form of the tag, without value or extension.

Notes
  • Warning: This could be empty if the original tag had a name_prefix prepended. e.g. a column where "Label/" is prepended, thus the column value has zero base portion.

org_tag property

org_tag: str

Return the original unmodified tag.

Returns:

Name Type Description
str str

The original unmodified tag.

schema_namespace property

schema_namespace: str

Library namespace for this tag if one exists.

Returns:

Name Type Description
namespace str

The library namespace, including the colon.

short_base_tag property writable

short_base_tag: str

Short form without value or extension.

Returns:

Name Type Description
str str

The short non-extension port of a tag.

Notes
  • ParentNodes/Def/DefName would return just "Def".

short_tag property

short_tag: str

Short form including value or extension.

Returns:

Name Type Description
str str

The short form of the tag, including value or extension.

tag property writable

tag: str

Returns the tag.

Returns the original tag if no user form set.

Returns:

Name Type Description
str str

The custom set user form of the tag.

unit_classes property

unit_classes: dict

Return a dict of all the unit classes this tag accepts.

Returns:

Name Type Description
dict dict

A dict of unit classes this tag accepts.

Notes
  • Returns empty dict if this is not a unit class tag.
  • The dictionary has unit name as the key and HedSchemaEntry as value.

value_classes property

value_classes: dict

Return a dict of all the value classes this tag accepts.

Returns:

Name Type Description
dict dict

A dictionary of HedSchemaEntry value classes this tag accepts.

Notes
  • Returns empty dict if this is not a value class.
  • The dictionary has unit name as the key and HedSchemaEntry as value.

base_tag_has_attribute

base_tag_has_attribute(tag_attribute) -> bool

Check to see if the tag has a specific attribute.

This is primarily used to check for things like TopLevelTag on Definitions and similar.

Parameters:

Name Type Description Default
tag_attribute str

A tag attribute.

required

Returns:

Name Type Description
bool bool

True if the tag has the specified attribute. False, if otherwise.

Source code in hed/models/hed_tag.py
def base_tag_has_attribute(self, tag_attribute) -> bool:
    """ Check to see if the tag has a specific attribute.

        This is primarily used to check for things like TopLevelTag on Definitions and similar.

    Parameters:
        tag_attribute (str): A tag attribute.

    Returns:
        bool: True if the tag has the specified attribute. False, if otherwise.

    """
    if not self._schema_entry:
        return False

    return self._schema_entry.base_tag_has_attribute(tag_attribute)

casefold

casefold() -> str

Convenience function, equivalent to str(self).casefold().

Source code in hed/models/hed_tag.py
def casefold(self) -> str:
    """ Convenience function, equivalent to str(self).casefold(). """
    return str(self).casefold()

copy

copy() -> 'HedTag'

Return a deep copy of this tag.

Returns:

Name Type Description
HedTag 'HedTag'

The copied group.

Source code in hed/models/hed_tag.py
def copy(self) -> "HedTag":
    """ Return a deep copy of this tag.

    Returns:
        HedTag: The copied group.

    """
    save_parent = self._parent
    self._parent = None
    return_copy = copy.deepcopy(self)
    self._parent = save_parent
    return return_copy

get_stripped_unit_value

get_stripped_unit_value(
    extension_text,
) -> tuple[Union[str, None], Union[str, None]]

Return the extension divided into value and units, if the units are valid.

Parameters:

Name Type Description Default
extension_text str

The text to split, in case it's a portion of a tag.

required

Returns:

Type Description
Union[str, None]

str or None: The extension portion with the units removed or None if invalid units.

Union[str, None]

str or None: The units or None if no units of the right unit class are found.

Examples:

'Duration/3 ms' will return ('3', 'ms')

Source code in hed/models/hed_tag.py
def get_stripped_unit_value(self, extension_text) -> tuple[Union[str, None], Union[str, None]]:
    """ Return the extension divided into value and units, if the units are valid.

    Parameters:
        extension_text (str): The text to split, in case it's a portion of a tag.

    Returns:
        str or None: The extension portion with the units removed or None if invalid units.
        str or None: The units or None if no units of the right unit class are found.

    Examples:
        'Duration/3 ms' will return ('3', 'ms')

    """
    tag_unit_classes = self.unit_classes
    stripped_value, units, match = HedTag._get_tag_units_portion(extension_text, tag_unit_classes)
    if stripped_value and match:
        return stripped_value, units
    elif units and not match:
        return None, units
    return extension_text, None

get_tag_unit_class_units

get_tag_unit_class_units() -> list

Get the unit class units associated with a particular tag.

Returns:

Name Type Description
list list

A list containing the unit class units associated with a particular tag or an empty list.

Source code in hed/models/hed_tag.py
def get_tag_unit_class_units(self) -> list:
    """ Get the unit class units associated with a particular tag.

    Returns:
        list: A list containing the unit class units associated with a particular tag or an empty list.

    """
    units = []
    unit_classes = self.unit_classes
    for unit_class_entry in unit_classes.values():
        units += unit_class_entry.units.keys()

    return units

has_attribute

has_attribute(attribute) -> bool

Return True if this is an attribute this tag has.

Parameters:

Name Type Description Default
attribute str

Name of the attribute.

required

Returns:

Name Type Description
bool bool

True if this tag has the attribute.

Source code in hed/models/hed_tag.py
def has_attribute(self, attribute) -> bool:
    """ Return True if this is an attribute this tag has.

    Parameters:
        attribute (str): Name of the attribute.

    Returns:
        bool: True if this tag has the attribute.

    """
    if self._schema_entry:
        return self._schema_entry.has_attribute(attribute)
    return False

is_basic_tag

is_basic_tag() -> bool

Return True if a known tag with no extension or value.

Returns:

Name Type Description
bool bool

True if this is a known tag without extension or value.

Source code in hed/models/hed_tag.py
def is_basic_tag(self) -> bool:
    """  Return True if a known tag with no extension or value.

    Returns:
        bool:  True if this is a known tag without extension or value.

    """
    return bool(self._schema_entry and not self.extension)

is_column_ref

is_column_ref() -> bool

Return if this tag is a column reference from a sidecar.

You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.

Returns:

Name Type Description
bool bool

Returns True if this is a column ref.

Source code in hed/models/hed_tag.py
def is_column_ref(self) -> bool:
    """ Return if this tag is a column reference from a sidecar.

        You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.

    Returns:
        bool: Returns True if this is a column ref.
    """
    return self.org_tag.startswith('{') and self.org_tag.endswith('}')

is_placeholder

is_placeholder() -> bool

Returns if this tag has a placeholder in it.

Returns:

Name Type Description
has_placeholder bool

True if it has a placeholder

Source code in hed/models/hed_tag.py
def is_placeholder(self) -> bool:
    """Returns if this tag has a placeholder in it.

    Returns:
        has_placeholder(bool): True if it has a placeholder
    """
    if "#" in self.org_tag or "#" in self._extension_value:
        return True
    return False

is_takes_value_tag

is_takes_value_tag() -> bool

Return True if this is a takes value tag.

Returns:

Name Type Description
bool bool

True if this is a takes value tag.

Source code in hed/models/hed_tag.py
def is_takes_value_tag(self) -> bool:
    """ Return True if this is a takes value tag.

    Returns:
        bool: True if this is a takes value tag.

    """
    if self._schema_entry:
        return self._schema_entry.has_attribute(HedKey.TakesValue)
    return False

is_unit_class_tag

is_unit_class_tag() -> bool

Return True if this is a unit class tag.

Returns:

Name Type Description
bool bool

True if this is a unit class tag.

Source code in hed/models/hed_tag.py
def is_unit_class_tag(self) -> bool:
    """ Return True if this is a unit class tag.

    Returns:
        bool: True if this is a unit class tag.

    """
    if self._schema_entry:
        return bool(self._schema_entry.unit_classes)
    return False

is_value_class_tag

is_value_class_tag() -> bool

Return True if this is a value class tag.

Returns:

Name Type Description
bool bool

True if this is a tag with a value class.

Source code in hed/models/hed_tag.py
def is_value_class_tag(self) -> bool:
    """ Return True if this is a value class tag.

    Returns:
        bool:  True if this is a tag with a value class.

    """
    if self._schema_entry:
        return bool(self._schema_entry.value_classes)
    return False

lower

lower() -> str

Convenience function, equivalent to str(self).lower().

Source code in hed/models/hed_tag.py
def lower(self) -> str:
    """ Convenience function, equivalent to str(self).lower(). """
    return str(self).lower()

replace_placeholder

replace_placeholder(placeholder_value)

If tag has a placeholder character(#), replace with value.

Parameters:

Name Type Description Default
placeholder_value str

Value to replace placeholder with.

required
Source code in hed/models/hed_tag.py
def replace_placeholder(self, placeholder_value):
    """ If tag has a placeholder character(#), replace with value.

    Parameters:
        placeholder_value (str): Value to replace placeholder with.

    """
    if self.is_placeholder():
        if self._schema_entry:
            tag = self.tag.replace('#', placeholder_value)
            self._extension_value = self._extension_value.replace("#", placeholder_value)
            self.tag = tag
        else:
            self._tag = self.tag.replace("#", placeholder_value)

tag_exists_in_schema

tag_exists_in_schema() -> bool

Return whether the schema entry for this tag exists.

Returns:

Name Type Description
bool bool

True if this tag exists.

Notes
  • This does NOT assure this is a valid tag.
Source code in hed/models/hed_tag.py
def tag_exists_in_schema(self) -> bool:
    """ Return whether the schema entry for this tag exists.

    Returns:
        bool: True if this tag exists.

    Notes:
        - This does NOT assure this is a valid tag.
    """
    return bool(self._schema_entry)

tag_modified

tag_modified() -> bool

Return True if tag has been modified from original.

Returns:

Name Type Description
bool bool

Return True if the tag is modified.

Notes
  • Modifications can include adding a column name_prefix.
Source code in hed/models/hed_tag.py
def tag_modified(self) -> bool:
    """ Return True if tag has been modified from original.

    Returns:
        bool: Return True if the tag is modified.

    Notes:
        - Modifications can include adding a column name_prefix.

    """
    return bool(self._tag)

value_as_default_unit

value_as_default_unit() -> Union[float, None]

Return the value converted to default units if possible or None if invalid.

Returns:

Type Description
Union[float, None]

float or None: The extension value in default units. If no default units it assumes that the extension value is in default units.

Examples:

'Duration/300 ms' will return .3

Source code in hed/models/hed_tag.py
def value_as_default_unit(self) -> Union[float, None]:
    """ Return the value converted to default units if possible or None if invalid.

    Returns:
        float or None: The extension value in default units.
                               If no default units it assumes that the extension value is in default units.

    Examples:
        'Duration/300 ms' will return .3

    """
    tag_unit_classes = self.unit_classes
    stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes)
    if not stripped_value:
        return None
    if unit and not unit_entry:
        return None
    if unit and unit_entry and unit_entry.get_conversion_factor(unit) is not None:
        return float(stripped_value) * unit_entry.get_conversion_factor(unit)
    return float(stripped_value)

HedGroup

HedGroup

A single parenthesized HED string.

Source code in hed/models/hed_group.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
class HedGroup:
    """ A single parenthesized HED string. """

    def __init__(self, hed_string="", startpos=None, endpos=None, contents=None):
        """ Return an empty HedGroup object.

        Parameters:
            hed_string (str or None): Source HED string for this group.
            startpos (int or None):   Starting index of group(including parentheses) in hed_string.
            endpos (int or None):     Position after the end (including parentheses) in hed_string.
            contents (list or None):  A list of HedTags and/or HedGroups that will be set as the contents of this group.
                                      Mostly used during definition expansion.
        """
        self._startpos = startpos
        self._endpos = endpos
        self._hed_string = hed_string
        self._parent = None

        if contents:
            self.children = contents
            for child in self.children:
                child._parent = self
        else:
            self.children = []
        self._original_children = self.children

    def append(self, tag_or_group):
        """ Add a tag or group to this group.

        Parameters:
            tag_or_group (HedTag or HedGroup): The new object to add to this group.
        """
        tag_or_group._parent = self
        self.children.append(tag_or_group)

    def check_if_in_original(self, tag_or_group) -> bool:
        """ Check if the tag or group in original string.

        Parameters:
            tag_or_group (HedTag or HedGroup): The HedTag or HedGroup to be looked for in this group.

        Returns:
            bool:  True if in this group.
        """
        node_list = [self]
        final_list = []

        # Using an iterator is worse performance wise here.
        while node_list:
            current_group_or_tag = node_list.pop(0)
            if isinstance(current_group_or_tag, HedGroup):
                node_list = current_group_or_tag._original_children + node_list
            final_list.append(current_group_or_tag)

        return self._check_in_group(tag_or_group, final_list)

    @staticmethod
    def replace(item_to_replace, new_contents):
        """ Replace an existing tag or group.

            Note: This is a static method that relies on the parent attribute of item_to_replace.

        Parameters:
            item_to_replace (HedTag or HedGroup): The item to replace must exist or this will raise an error.
            new_contents (HedTag or HedGroup): Replacement contents.

        :raises KeyError:
            - item_to_replace does not exist.

        :raises AttributeError:
            - item_to_replace has no parent set.
        """
        parent = item_to_replace._parent
        parent._replace(item_to_replace=item_to_replace, new_contents=new_contents)

    def _replace(self, item_to_replace, new_contents):
        """ Replace an existing tag or group.

        Parameters:
            item_to_replace (HedTag or HedGroup): The item to replace must exist and be a direct child,
                                                  or this will raise an error.
            new_contents (HedTag or HedGroup): Replacement contents.

        :raises KeyError:
            - item_to_replace does not exist.
        """
        if self._original_children is self.children:
            self._original_children = self.children.copy()

        for i, child in enumerate(self.children):
            if item_to_replace is child:
                self.children[i] = new_contents
                new_contents._parent = self
                return

        raise KeyError(f"The tag {item_to_replace} not found in the group.")

    def remove(self, items_to_remove: Iterable[Union[HedTag, 'HedGroup']]):
        """ Remove any tags/groups in items_to_remove.

        Parameters:
            items_to_remove (list):  List of HedGroups and/or HedTags to remove by identity.

        Notes:
            - Any groups that become empty will also be pruned.
            - If you pass a child and parent group, the child will also be removed from the parent.
        """
        empty_groups = []
        # Filter out duplicates
        items_to_remove = {id(item): item for item in items_to_remove}.values()

        for item in items_to_remove:
            group = item._parent
            if group._original_children is group.children:
                group._original_children = group.children.copy()

            group.children.remove(item)
            if not group.children and group is not self:
                empty_groups.append(group)

        if empty_groups:
            self.remove(empty_groups)

        # Do this last to avoid confusing typing
        for item in items_to_remove:
            item._parent = None

    def __copy__(self):
        raise ValueError("Cannot make shallow copies of HedGroups")

    def copy(self) -> "HedGroup":
        """ Return a deep copy of this group.

        Returns:
            HedGroup: The copied group.

        """
        save_parent = self._parent
        self._parent = None
        return_copy = copy.deepcopy(self)
        self._parent = save_parent
        return return_copy

    def sort(self):
        """ Sort the tags and groups in this HedString in a consistent order."""
        self._sorted(update_self=True)

    def sorted(self) -> "HedGroup":
        """ Return a sorted copy of this HED group

        Returns:
            sorted_copy (HedGroup): The sorted copy.
        """
        string_copy = self.copy()
        string_copy._sorted(update_self=True)
        return string_copy

    def _sorted(self, update_self=False) -> list:
        """ Return a sorted copy of this HED group as a list of it's children.

        Parameters:
            update_self (bool): If True, update the contents of this group to be sorted as well.

        Returns:
            list: The list of all tags in this group, with subgroups being returned as further nested lists.
        """
        tag_list = []
        group_list = []
        queue_list = list(self.children)
        for child in queue_list:
            if isinstance(child, HedTag):
                tag_list.append((child, child))
            else:
                group_list.append((child, child._sorted(update_self)))

        tag_list.sort(key=lambda x: str(x[0]))
        group_list.sort(key=lambda x: str(x[0]))
        output_list = tag_list + group_list
        if update_self:
            self.children = [x[0] for x in output_list]
        return [x[1] for x in output_list]

    @property
    def is_group(self):
        """ True if this is a parenthesized group. """
        return True

    def get_all_tags(self) -> list:
        """ Return HedTags, including descendants.

        Returns:
            list:  A list of all the tags in this group including descendants.

        """
        node_list = [self]
        final_list = []

        # Using an iterator is worse performance wise here.
        while node_list:
            current_group_or_tag = node_list.pop(0)
            if isinstance(current_group_or_tag, HedGroup):
                node_list = list(current_group_or_tag.children) + node_list
            else:
                final_list.append(current_group_or_tag)
        return final_list

    def get_all_groups(self, also_return_depth=False) -> list:
        """ Return HedGroups, including descendants and self.

        Parameters:
            also_return_depth (bool): If True, yield tuples (group, depth) rather than just groups.

        Returns:
            list: The list of all HedGroups in this group, including descendants and self.

        """
        node_list = [self]
        final_list = []

        # Using an iterator is worse performance wise here.
        while node_list:
            current_group_or_tag = node_list.pop(0)
            if isinstance(current_group_or_tag, HedGroup):
                node_list = list(current_group_or_tag.children) + node_list
                final_list.append(current_group_or_tag)

        if also_return_depth:
            top_groups = self.groups()

            final_list = [(group, self._check_in_group(group, top_groups)) for group in final_list]
        return final_list

    @staticmethod
    def _check_in_group(group, group_list) -> bool:
        """ Return True if the group is list.

        Parameters:
            group (HedGroup): The group to check for.
            group_list (list):    A list of groups to search.

        Returns:
            bool: True if group is in the group list.

        """
        for val in group_list:
            if val is group:
                return True
        return False

    def tags(self) -> list:
        """ Return the direct child tags of this group.

        Returns:
            list: All tags directly in this group, filtering out HedGroup children.

        """
        return [tag for tag in self.children if isinstance(tag, HedTag)]

    def groups(self) -> list:
        """ Return the direct child groups of this group.

        Returns:
            list: All groups directly in this group, filtering out HedTag children.

        """
        return [group for group in self.children if isinstance(group, HedGroup)]

    def get_first_group(self) -> HedGroup:
        """ Return the first group in this HED string or group.

            Useful for things like Def-expand where they only have a single group.

            Raises a ValueError if there are no groups.

        Returns:
            HedGroup: The first group.

        """
        return self.groups()[0]

    def get_original_hed_string(self) -> str:
        """ Get the original HED string.

        Returns:
            str: The original string with no modification.

        """
        return self._hed_string[self._startpos:self._endpos]

    @property
    def span(self):
        """ Return the source span.

        Return:
            int: start index of the group (including parentheses) from the source string.
            int: end index of the group (including parentheses) from the source string.

        """
        return self._startpos, self._endpos

    def __str__(self) -> str:
        """ Convert this HedGroup to a string.

        Returns:
            str: The group as a string, including any modified HedTags.

        """
        if self.is_group:
            return "(" + ",".join([str(child) for child in self.children]) + ")"
        return ",".join([str(child) for child in self.children])

    def get_as_short(self) -> str:
        """ Return this HedGroup as a short tag string.

        Returns:
            str: The group as a string with all tags as short tags.

        """
        return self.get_as_form("short_tag")

    def get_as_long(self) -> str:
        """ Return this HedGroup as a long tag string.

        Returns:
            str: The group as a string with all tags as long tags.

        """
        return self.get_as_form("long_tag")

    def get_as_form(self, tag_attribute) -> str:
        """ Get the string corresponding to the specified form.

        Parameters:
            tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

        Returns:
            str: The constructed string after transformation.
        """
        result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else
                           child.get_as_form(tag_attribute) for child in self.children])
        if self.is_group:
            return f"({result})"
        return result

    def lower(self):
        """ Convenience function, equivalent to str(self).lower(). """
        return str(self).lower()

    def casefold(self):
        """ Convenience function, equivalent to str(self).casefold(). """
        return str(self).casefold()

    def get_as_indented(self, tag_attribute="short_tag"):
        """Return the string as a multiline indented format.

        Parameters:
            tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

        Returns:
            formatted_hed (str): The indented string.
        """
        hed_string = self.sorted().get_as_form(tag_attribute)

        level_open = []
        level = 0
        indented = ""
        prev = ''
        for c in hed_string:
            if c == "(":
                level_open.append(level)
                indented += "\n" + "\t" * level + c
                level += 1
            elif c == ")":
                level = level_open.pop()
                if prev == ")":
                    indented += "\n" + "\t" * level + c
                else:
                    indented += c

            else:
                indented += c
            prev = c

        return indented

    def find_placeholder_tag(self) -> Union[HedTag, None]:
        """ Return a placeholder tag, if present in this group.

        Returns:
            Union[HedTag, None]: The placeholder tag if found.

        Notes:
            - Assumes a valid HedString with no erroneous "#" characters.
        """
        for tag in self.get_all_tags():
            if tag.is_placeholder():
                return tag

        return None

    def __bool__(self):
        return bool(self.children)

    def __eq__(self, other):
        """ Test whether other is equal to this object.

            Note: This does not account for sorting.  Objects must be in the same order to match.
        """
        if self is other:
            return True

        # Allow us to compare to a list of groups.
        # Note this comparison will NOT check if the list has the outer parenthesis
        if isinstance(other, list):
            return self.children == other
        if isinstance(other, str):
            return str(self) == other
        if not isinstance(other, HedGroup) or self.children != other.children or self.is_group != other.is_group:
            return False
        return True

    def find_tags(self, search_tags, recursive=False, include_groups=2) -> list:
        """ Find the base tags and their containing groups.
        This searches by short_base_tag, ignoring any ancestors or extensions/values.

        Parameters:
            search_tags (container):  A container of short_base_tags to locate.
            recursive (bool): If true, also check subgroups.
            include_groups (0, 1 or 2): Specify return values.
                If 0: return a list of the HedTags.
                If 1: return a list of the HedGroups containing the HedTags.
                If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

        Returns:
            list: The contents of the list depends on the value of include_groups.
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()
        search_tags = {tag.casefold() for tag in search_tags}
        for tag in tags:
            if tag.short_base_tag.casefold() in search_tags:
                found_tags.append((tag, tag._parent))

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

    def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2) -> list:
        """ Find the tags and their containing groups.

            This searches tag.short_tag.casefold(), with an implicit wildcard on the end.

            e.g. "Eve" will find Event, but not Sensory-event.

        Parameters:
            search_tags (container): A container of the starts of short tags to search.
            recursive (bool): If True, also check subgroups.
            include_groups (0, 1 or 2): Specify return values.
                If 0: return a list of the HedTags.
                If 1: return a list of the HedGroups containing the HedTags.
                If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

        Returns:
            list: The contents of the list depends on the value of include_groups.
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()

        search_tags = {search_tag.casefold() for search_tag in search_tags}

        for tag in tags:
            for search_tag in search_tags:
                if tag.short_tag.casefold().startswith(search_tag):
                    found_tags.append((tag, tag._parent))
                    # We can't find the same tag twice
                    break

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

    def find_exact_tags(self, exact_tags, recursive=False, include_groups=1) -> list:
        """  Find the given tags.  This will only find complete matches, any extension or value must also match.

        Parameters:
            exact_tags (list of HedTag): A container of tags to locate.
            recursive (bool): If true, also check subgroups.
            include_groups (bool): 0, 1 or 2.
                If 0: Return only tags
                If 1: Return only groups
                If 2 or any other value: Return both
        Returns:
            list: A list of tuples. The contents depend on the values of the include_group.
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()

        for tag in tags:
            if tag in exact_tags:
                found_tags.append((tag, tag._parent))

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

    def find_def_tags(self, recursive=False, include_groups=3) -> list:
        """ Find def and def-expand tags.

        Parameters:
            recursive (bool): If true, also check subgroups.
            include_groups (int, 0, 1, 2, 3): Options for return values.
                If 0: Return only def and def expand tags/.
                If 1: Return only def tags and def-expand groups.
                If 2: Return only groups containing defs, or def-expand groups.
                If 3 or any other value: Return all 3 as a tuple.
        Returns:
            list: A list of tuples. The contents depend on the values of the include_group.
        """
        if recursive:
            groups = self.get_all_groups()
            def_tags = []
            for group in groups:
                def_tags += self._get_def_tags_from_group(group)
        else:
            def_tags = self._get_def_tags_from_group(self)

        if include_groups == 0 or include_groups == 1 or include_groups == 2:
            return [tag[include_groups] for tag in def_tags]
        return def_tags

    @staticmethod
    def _get_def_tags_from_group(group):
        def_tags = []
        for child in group.children:
            if isinstance(child, HedTag):
                if child.short_base_tag == DefTagNames.DEF_KEY:
                    def_tags.append((child, child, group))
            else:
                for tag in child.tags():
                    if tag.short_base_tag == DefTagNames.DEF_EXPAND_KEY:
                        def_tags.append((tag, child, group))
        return def_tags

    def find_tags_with_term(self, term, recursive=False, include_groups=2) -> list:
        """  Find any tags that contain the given term.

            Note: This can only find identified tags.

        Parameters:
            term (str): A single term to search for.
            recursive (bool): If true, recursively check subgroups.
            include_groups (0, 1 or 2): Controls return values
                If 0: Return only tags.
                If 1: Return only groups.
                If 2 or any other value: Return both.

        Returns:
            list:
        """
        found_tags = []
        if recursive:
            tags = self.get_all_tags()
        else:
            tags = self.tags()

        search_for = term.casefold()
        for tag in tags:
            if search_for in tag.tag_terms:
                found_tags.append((tag, tag._parent))

        if include_groups == 0 or include_groups == 1:
            return [tag[include_groups] for tag in found_tags]
        return found_tags

is_group property

is_group

True if this is a parenthesized group.

span property

span

Return the source span.

Return

int: start index of the group (including parentheses) from the source string. int: end index of the group (including parentheses) from the source string.

append

append(tag_or_group)

Add a tag or group to this group.

Parameters:

Name Type Description Default
tag_or_group HedTag or HedGroup

The new object to add to this group.

required
Source code in hed/models/hed_group.py
def append(self, tag_or_group):
    """ Add a tag or group to this group.

    Parameters:
        tag_or_group (HedTag or HedGroup): The new object to add to this group.
    """
    tag_or_group._parent = self
    self.children.append(tag_or_group)

casefold

casefold()

Convenience function, equivalent to str(self).casefold().

Source code in hed/models/hed_group.py
def casefold(self):
    """ Convenience function, equivalent to str(self).casefold(). """
    return str(self).casefold()

check_if_in_original

check_if_in_original(tag_or_group) -> bool

Check if the tag or group in original string.

Parameters:

Name Type Description Default
tag_or_group HedTag or HedGroup

The HedTag or HedGroup to be looked for in this group.

required

Returns:

Name Type Description
bool bool

True if in this group.

Source code in hed/models/hed_group.py
def check_if_in_original(self, tag_or_group) -> bool:
    """ Check if the tag or group in original string.

    Parameters:
        tag_or_group (HedTag or HedGroup): The HedTag or HedGroup to be looked for in this group.

    Returns:
        bool:  True if in this group.
    """
    node_list = [self]
    final_list = []

    # Using an iterator is worse performance wise here.
    while node_list:
        current_group_or_tag = node_list.pop(0)
        if isinstance(current_group_or_tag, HedGroup):
            node_list = current_group_or_tag._original_children + node_list
        final_list.append(current_group_or_tag)

    return self._check_in_group(tag_or_group, final_list)

copy

copy() -> 'HedGroup'

Return a deep copy of this group.

Returns:

Name Type Description
HedGroup 'HedGroup'

The copied group.

Source code in hed/models/hed_group.py
def copy(self) -> "HedGroup":
    """ Return a deep copy of this group.

    Returns:
        HedGroup: The copied group.

    """
    save_parent = self._parent
    self._parent = None
    return_copy = copy.deepcopy(self)
    self._parent = save_parent
    return return_copy

find_def_tags

find_def_tags(recursive=False, include_groups=3) -> list

Find def and def-expand tags.

Parameters:

Name Type Description Default
recursive bool

If true, also check subgroups.

False
include_groups (int, 0, 1, 2, 3)

Options for return values. If 0: Return only def and def expand tags/. If 1: Return only def tags and def-expand groups. If 2: Return only groups containing defs, or def-expand groups. If 3 or any other value: Return all 3 as a tuple.

3

Returns: list: A list of tuples. The contents depend on the values of the include_group.

Source code in hed/models/hed_group.py
def find_def_tags(self, recursive=False, include_groups=3) -> list:
    """ Find def and def-expand tags.

    Parameters:
        recursive (bool): If true, also check subgroups.
        include_groups (int, 0, 1, 2, 3): Options for return values.
            If 0: Return only def and def expand tags/.
            If 1: Return only def tags and def-expand groups.
            If 2: Return only groups containing defs, or def-expand groups.
            If 3 or any other value: Return all 3 as a tuple.
    Returns:
        list: A list of tuples. The contents depend on the values of the include_group.
    """
    if recursive:
        groups = self.get_all_groups()
        def_tags = []
        for group in groups:
            def_tags += self._get_def_tags_from_group(group)
    else:
        def_tags = self._get_def_tags_from_group(self)

    if include_groups == 0 or include_groups == 1 or include_groups == 2:
        return [tag[include_groups] for tag in def_tags]
    return def_tags

find_exact_tags

find_exact_tags(
    exact_tags, recursive=False, include_groups=1
) -> list

Find the given tags. This will only find complete matches, any extension or value must also match.

Parameters:

Name Type Description Default
exact_tags list of HedTag

A container of tags to locate.

required
recursive bool

If true, also check subgroups.

False
include_groups bool

0, 1 or 2. If 0: Return only tags If 1: Return only groups If 2 or any other value: Return both

1

Returns: list: A list of tuples. The contents depend on the values of the include_group.

Source code in hed/models/hed_group.py
def find_exact_tags(self, exact_tags, recursive=False, include_groups=1) -> list:
    """  Find the given tags.  This will only find complete matches, any extension or value must also match.

    Parameters:
        exact_tags (list of HedTag): A container of tags to locate.
        recursive (bool): If true, also check subgroups.
        include_groups (bool): 0, 1 or 2.
            If 0: Return only tags
            If 1: Return only groups
            If 2 or any other value: Return both
    Returns:
        list: A list of tuples. The contents depend on the values of the include_group.
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()

    for tag in tags:
        if tag in exact_tags:
            found_tags.append((tag, tag._parent))

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

find_placeholder_tag

find_placeholder_tag() -> Union[HedTag, None]

Return a placeholder tag, if present in this group.

Returns:

Type Description
Union[HedTag, None]

Union[HedTag, None]: The placeholder tag if found.

Notes
  • Assumes a valid HedString with no erroneous "#" characters.
Source code in hed/models/hed_group.py
def find_placeholder_tag(self) -> Union[HedTag, None]:
    """ Return a placeholder tag, if present in this group.

    Returns:
        Union[HedTag, None]: The placeholder tag if found.

    Notes:
        - Assumes a valid HedString with no erroneous "#" characters.
    """
    for tag in self.get_all_tags():
        if tag.is_placeholder():
            return tag

    return None

find_tags

find_tags(
    search_tags, recursive=False, include_groups=2
) -> list

Find the base tags and their containing groups. This searches by short_base_tag, ignoring any ancestors or extensions/values.

Parameters:

Name Type Description Default
search_tags container

A container of short_base_tags to locate.

required
recursive bool

If true, also check subgroups.

False
include_groups (0, 1 or 2)

Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

2

Returns:

Name Type Description
list list

The contents of the list depends on the value of include_groups.

Source code in hed/models/hed_group.py
def find_tags(self, search_tags, recursive=False, include_groups=2) -> list:
    """ Find the base tags and their containing groups.
    This searches by short_base_tag, ignoring any ancestors or extensions/values.

    Parameters:
        search_tags (container):  A container of short_base_tags to locate.
        recursive (bool): If true, also check subgroups.
        include_groups (0, 1 or 2): Specify return values.
            If 0: return a list of the HedTags.
            If 1: return a list of the HedGroups containing the HedTags.
            If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

    Returns:
        list: The contents of the list depends on the value of include_groups.
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()
    search_tags = {tag.casefold() for tag in search_tags}
    for tag in tags:
        if tag.short_base_tag.casefold() in search_tags:
            found_tags.append((tag, tag._parent))

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

find_tags_with_term

find_tags_with_term(
    term, recursive=False, include_groups=2
) -> list

Find any tags that contain the given term.

Note: This can only find identified tags.

Parameters:

Name Type Description Default
term str

A single term to search for.

required
recursive bool

If true, recursively check subgroups.

False
include_groups (0, 1 or 2)

Controls return values If 0: Return only tags. If 1: Return only groups. If 2 or any other value: Return both.

2

Returns:

Name Type Description
list list
Source code in hed/models/hed_group.py
def find_tags_with_term(self, term, recursive=False, include_groups=2) -> list:
    """  Find any tags that contain the given term.

        Note: This can only find identified tags.

    Parameters:
        term (str): A single term to search for.
        recursive (bool): If true, recursively check subgroups.
        include_groups (0, 1 or 2): Controls return values
            If 0: Return only tags.
            If 1: Return only groups.
            If 2 or any other value: Return both.

    Returns:
        list:
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()

    search_for = term.casefold()
    for tag in tags:
        if search_for in tag.tag_terms:
            found_tags.append((tag, tag._parent))

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

find_wildcard_tags

find_wildcard_tags(
    search_tags, recursive=False, include_groups=2
) -> list

Find the tags and their containing groups.

This searches tag.short_tag.casefold(), with an implicit wildcard on the end.

e.g. "Eve" will find Event, but not Sensory-event.

Parameters:

Name Type Description Default
search_tags container

A container of the starts of short tags to search.

required
recursive bool

If True, also check subgroups.

False
include_groups (0, 1 or 2)

Specify return values. If 0: return a list of the HedTags. If 1: return a list of the HedGroups containing the HedTags. If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

2

Returns:

Name Type Description
list list

The contents of the list depends on the value of include_groups.

Source code in hed/models/hed_group.py
def find_wildcard_tags(self, search_tags, recursive=False, include_groups=2) -> list:
    """ Find the tags and their containing groups.

        This searches tag.short_tag.casefold(), with an implicit wildcard on the end.

        e.g. "Eve" will find Event, but not Sensory-event.

    Parameters:
        search_tags (container): A container of the starts of short tags to search.
        recursive (bool): If True, also check subgroups.
        include_groups (0, 1 or 2): Specify return values.
            If 0: return a list of the HedTags.
            If 1: return a list of the HedGroups containing the HedTags.
            If 2: return a list of tuples (HedTag, HedGroup) for the found tags.

    Returns:
        list: The contents of the list depends on the value of include_groups.
    """
    found_tags = []
    if recursive:
        tags = self.get_all_tags()
    else:
        tags = self.tags()

    search_tags = {search_tag.casefold() for search_tag in search_tags}

    for tag in tags:
        for search_tag in search_tags:
            if tag.short_tag.casefold().startswith(search_tag):
                found_tags.append((tag, tag._parent))
                # We can't find the same tag twice
                break

    if include_groups == 0 or include_groups == 1:
        return [tag[include_groups] for tag in found_tags]
    return found_tags

get_all_groups

get_all_groups(also_return_depth=False) -> list

Return HedGroups, including descendants and self.

Parameters:

Name Type Description Default
also_return_depth bool

If True, yield tuples (group, depth) rather than just groups.

False

Returns:

Name Type Description
list list

The list of all HedGroups in this group, including descendants and self.

Source code in hed/models/hed_group.py
def get_all_groups(self, also_return_depth=False) -> list:
    """ Return HedGroups, including descendants and self.

    Parameters:
        also_return_depth (bool): If True, yield tuples (group, depth) rather than just groups.

    Returns:
        list: The list of all HedGroups in this group, including descendants and self.

    """
    node_list = [self]
    final_list = []

    # Using an iterator is worse performance wise here.
    while node_list:
        current_group_or_tag = node_list.pop(0)
        if isinstance(current_group_or_tag, HedGroup):
            node_list = list(current_group_or_tag.children) + node_list
            final_list.append(current_group_or_tag)

    if also_return_depth:
        top_groups = self.groups()

        final_list = [(group, self._check_in_group(group, top_groups)) for group in final_list]
    return final_list

get_all_tags

get_all_tags() -> list

Return HedTags, including descendants.

Returns:

Name Type Description
list list

A list of all the tags in this group including descendants.

Source code in hed/models/hed_group.py
def get_all_tags(self) -> list:
    """ Return HedTags, including descendants.

    Returns:
        list:  A list of all the tags in this group including descendants.

    """
    node_list = [self]
    final_list = []

    # Using an iterator is worse performance wise here.
    while node_list:
        current_group_or_tag = node_list.pop(0)
        if isinstance(current_group_or_tag, HedGroup):
            node_list = list(current_group_or_tag.children) + node_list
        else:
            final_list.append(current_group_or_tag)
    return final_list

get_as_form

get_as_form(tag_attribute) -> str

Get the string corresponding to the specified form.

Parameters:

Name Type Description Default
tag_attribute str

The hed_tag property to use to construct the string (usually short_tag or long_tag).

required

Returns:

Name Type Description
str str

The constructed string after transformation.

Source code in hed/models/hed_group.py
def get_as_form(self, tag_attribute) -> str:
    """ Get the string corresponding to the specified form.

    Parameters:
        tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

    Returns:
        str: The constructed string after transformation.
    """
    result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else
                       child.get_as_form(tag_attribute) for child in self.children])
    if self.is_group:
        return f"({result})"
    return result

get_as_indented

get_as_indented(tag_attribute='short_tag')

Return the string as a multiline indented format.

Parameters:

Name Type Description Default
tag_attribute str

The hed_tag property to use to construct the string (usually short_tag or long_tag).

'short_tag'

Returns:

Name Type Description
formatted_hed str

The indented string.

Source code in hed/models/hed_group.py
def get_as_indented(self, tag_attribute="short_tag"):
    """Return the string as a multiline indented format.

    Parameters:
        tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).

    Returns:
        formatted_hed (str): The indented string.
    """
    hed_string = self.sorted().get_as_form(tag_attribute)

    level_open = []
    level = 0
    indented = ""
    prev = ''
    for c in hed_string:
        if c == "(":
            level_open.append(level)
            indented += "\n" + "\t" * level + c
            level += 1
        elif c == ")":
            level = level_open.pop()
            if prev == ")":
                indented += "\n" + "\t" * level + c
            else:
                indented += c

        else:
            indented += c
        prev = c

    return indented

get_as_long

get_as_long() -> str

Return this HedGroup as a long tag string.

Returns:

Name Type Description
str str

The group as a string with all tags as long tags.

Source code in hed/models/hed_group.py
def get_as_long(self) -> str:
    """ Return this HedGroup as a long tag string.

    Returns:
        str: The group as a string with all tags as long tags.

    """
    return self.get_as_form("long_tag")

get_as_short

get_as_short() -> str

Return this HedGroup as a short tag string.

Returns:

Name Type Description
str str

The group as a string with all tags as short tags.

Source code in hed/models/hed_group.py
def get_as_short(self) -> str:
    """ Return this HedGroup as a short tag string.

    Returns:
        str: The group as a string with all tags as short tags.

    """
    return self.get_as_form("short_tag")

get_first_group

get_first_group() -> HedGroup

Return the first group in this HED string or group.

Useful for things like Def-expand where they only have a single group.

Raises a ValueError if there are no groups.

Returns:

Name Type Description
HedGroup HedGroup

The first group.

Source code in hed/models/hed_group.py
def get_first_group(self) -> HedGroup:
    """ Return the first group in this HED string or group.

        Useful for things like Def-expand where they only have a single group.

        Raises a ValueError if there are no groups.

    Returns:
        HedGroup: The first group.

    """
    return self.groups()[0]

get_original_hed_string

get_original_hed_string() -> str

Get the original HED string.

Returns:

Name Type Description
str str

The original string with no modification.

Source code in hed/models/hed_group.py
def get_original_hed_string(self) -> str:
    """ Get the original HED string.

    Returns:
        str: The original string with no modification.

    """
    return self._hed_string[self._startpos:self._endpos]

groups

groups() -> list

Return the direct child groups of this group.

Returns:

Name Type Description
list list

All groups directly in this group, filtering out HedTag children.

Source code in hed/models/hed_group.py
def groups(self) -> list:
    """ Return the direct child groups of this group.

    Returns:
        list: All groups directly in this group, filtering out HedTag children.

    """
    return [group for group in self.children if isinstance(group, HedGroup)]

lower

lower()

Convenience function, equivalent to str(self).lower().

Source code in hed/models/hed_group.py
def lower(self):
    """ Convenience function, equivalent to str(self).lower(). """
    return str(self).lower()

remove

remove(
    items_to_remove: Iterable[Union[HedTag, "HedGroup"]],
)

Remove any tags/groups in items_to_remove.

Parameters:

Name Type Description Default
items_to_remove list

List of HedGroups and/or HedTags to remove by identity.

required
Notes
  • Any groups that become empty will also be pruned.
  • If you pass a child and parent group, the child will also be removed from the parent.
Source code in hed/models/hed_group.py
def remove(self, items_to_remove: Iterable[Union[HedTag, 'HedGroup']]):
    """ Remove any tags/groups in items_to_remove.

    Parameters:
        items_to_remove (list):  List of HedGroups and/or HedTags to remove by identity.

    Notes:
        - Any groups that become empty will also be pruned.
        - If you pass a child and parent group, the child will also be removed from the parent.
    """
    empty_groups = []
    # Filter out duplicates
    items_to_remove = {id(item): item for item in items_to_remove}.values()

    for item in items_to_remove:
        group = item._parent
        if group._original_children is group.children:
            group._original_children = group.children.copy()

        group.children.remove(item)
        if not group.children and group is not self:
            empty_groups.append(group)

    if empty_groups:
        self.remove(empty_groups)

    # Do this last to avoid confusing typing
    for item in items_to_remove:
        item._parent = None

replace staticmethod

replace(item_to_replace, new_contents)

Replace an existing tag or group.

Note: This is a static method that relies on the parent attribute of item_to_replace.

Parameters:

Name Type Description Default
item_to_replace HedTag or HedGroup

The item to replace must exist or this will raise an error.

required
new_contents HedTag or HedGroup

Replacement contents.

required

:raises KeyError: - item_to_replace does not exist.

:raises AttributeError: - item_to_replace has no parent set.

Source code in hed/models/hed_group.py
@staticmethod
def replace(item_to_replace, new_contents):
    """ Replace an existing tag or group.

        Note: This is a static method that relies on the parent attribute of item_to_replace.

    Parameters:
        item_to_replace (HedTag or HedGroup): The item to replace must exist or this will raise an error.
        new_contents (HedTag or HedGroup): Replacement contents.

    :raises KeyError:
        - item_to_replace does not exist.

    :raises AttributeError:
        - item_to_replace has no parent set.
    """
    parent = item_to_replace._parent
    parent._replace(item_to_replace=item_to_replace, new_contents=new_contents)

sort

sort()

Sort the tags and groups in this HedString in a consistent order.

Source code in hed/models/hed_group.py
def sort(self):
    """ Sort the tags and groups in this HedString in a consistent order."""
    self._sorted(update_self=True)

sorted

sorted() -> 'HedGroup'

Return a sorted copy of this HED group

Returns:

Name Type Description
sorted_copy HedGroup

The sorted copy.

Source code in hed/models/hed_group.py
def sorted(self) -> "HedGroup":
    """ Return a sorted copy of this HED group

    Returns:
        sorted_copy (HedGroup): The sorted copy.
    """
    string_copy = self.copy()
    string_copy._sorted(update_self=True)
    return string_copy

tags

tags() -> list

Return the direct child tags of this group.

Returns:

Name Type Description
list list

All tags directly in this group, filtering out HedGroup children.

Source code in hed/models/hed_group.py
def tags(self) -> list:
    """ Return the direct child tags of this group.

    Returns:
        list: All tags directly in this group, filtering out HedGroup children.

    """
    return [tag for tag in self.children if isinstance(tag, HedTag)]

Sidecar

Sidecar

Contents of a JSON file or JSON files.

Source code in hed/models/sidecar.py
class Sidecar:
    """ Contents of a JSON file or JSON files.

    """

    def __init__(self, files, name=None):
        """ Construct a Sidecar object representing a JSON file.

        Parameters:
            files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.
            name (str or None): Optional name identifying this sidecar, generally a filename.
        """
        self.name = name
        self.loaded_dict = self.load_sidecar_files(files)
        self._def_dict = None
        self._extract_definition_issues = []

    def __iter__(self):
        """ An iterator to go over the individual column metadata.

        Returns:
            iterator: An iterator over the column metadata values.

        """
        return iter(self.column_data.values())

    def __getitem__(self, column_name):
        if column_name not in self.loaded_dict:
            return None
        return ColumnMetadata(name=column_name)

    @property
    def all_hed_columns(self):
        """ Return all columns that are HED compatible.

            Returns:
                column_refs(list): A list of all valid HED columns by name.
        """
        possible_column_references = [column.column_name for column in self if column.column_type != ColumnType.Ignore]

        return possible_column_references

    @property
    def def_dict(self) -> 'DefinitionDict':
        """ Definitions from this sidecar.

            Generally you should instead call get_def_dict to get the relevant definitions.

        Returns:
            DefinitionDict: The definitions for this sidecar.
        """
        return self._def_dict

    @property
    def column_data(self):
        """ Generate the ColumnMetadata for this sidecar.

        Returns:
            dict({str:ColumnMetadata}): The column metadata defined by this sidecar.
        """
        return {col_name: ColumnMetadata(name=col_name, source=self.loaded_dict) for col_name in self.loaded_dict}

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
        """ Return the definition dict for this sidecar.

        Parameters:
            hed_schema (HedSchema): Identifies tags to find definitions.
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
            DefinitionDict:  A single definition dict representing all the data(and extra def dicts).
        """
        if self._def_dict is None and hed_schema:
            self._def_dict = self.extract_definitions(hed_schema)
        def_dicts = []
        if self.def_dict:
            def_dicts.append(self.def_dict)
        if extra_def_dicts:
            if not isinstance(extra_def_dicts, list):
                extra_def_dicts = [extra_def_dicts]
            def_dicts += extra_def_dicts
        return DefinitionDict(def_dicts)

    def save_as_json(self, save_filename):
        """ Save column metadata to a JSON file.

        Parameters:
            save_filename (str): Path to save file.

        """
        with open(save_filename, "w") as fp:
            json.dump(self.loaded_dict, fp, indent=4)

    def get_as_json_string(self) -> str:
        """ Return this sidecar's column metadata as a string.

        Returns:
            str: The json string representing this sidecar.

        """
        return json.dumps(self.loaded_dict, indent=4)

    def load_sidecar_file(self, file):
        """ Load column metadata from a given json file.

        Parameters:
            file (str or FileLike): If a string, this is a filename. Otherwise, it will be parsed as a file-like.

        :raises HedFileError:
            - If the file was not found or could not be parsed into JSON.
        """
        if not file:
            return {}
        elif isinstance(file, str):
            if not self.name:
                self.name = file
            try:
                with open(file, "r") as fp:
                    return self._load_json_file(fp)
            except OSError as e:
                raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) from e
        else:
            return self._load_json_file(file)

    def load_sidecar_files(self, files):
        """ Load json from a given file or list.

        Parameters:
            files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.

        :raises HedFileError:
            - If the file was not found or could not be parsed into JSON.

        """
        if not files:
            return {}
        if not isinstance(files, list):
            files = [files]

        merged_dict = {}
        for file in files:
            loaded_json = self.load_sidecar_file(file)
            merged_dict.update(loaded_json)
        return merged_dict

    def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
        """Create a SidecarValidator and validate this sidecar with the schema.

        Parameters:
            hed_schema (HedSchema): Input data to be validated.
            extra_def_dicts (list or DefinitionDict): Extra def dicts in addition to sidecar.
            name (str): The name to report this sidecar as.
            error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

        Returns:
            list[dict]: A list of issues associated with each level in the HED string.
        """
        from hed.validator.sidecar_validator import SidecarValidator

        if error_handler is None:
            error_handler = ErrorHandler()

        validator = SidecarValidator(hed_schema)
        issues = validator.validate(self, extra_def_dicts, name, error_handler=error_handler)
        return issues

    def _load_json_file(self, fp):
        """ Load the raw json of a given file.

        Parameters:
            fp (File-like): The JSON source stream.

        :raises HedFileError:
            - If the file cannot be parsed.
        """
        try:
            return json.load(fp)
        except (json.decoder.JSONDecodeError, AttributeError) as e:
            raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) from e

    def extract_definitions(self, hed_schema, error_handler=None) -> 'DefinitionDict':
        """ Gather and validate definitions in metadata.

        Parameters:
            hed_schema (HedSchema): The schema to used to identify tags.
            error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None.

        Returns:
            DefinitionDict: Contains all the definitions located in the sidecar.

        """
        if error_handler is None:
            error_handler = ErrorHandler()
        def_dict = DefinitionDict()

        self._extract_definition_issues = []
        if hed_schema:
            for column_data in self:
                error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_data.column_name)
                hed_strings = column_data.get_hed_strings()
                for key_name, hed_string in hed_strings.items():
                    hed_string_obj = HedString(hed_string, hed_schema)
                    if len(hed_strings) > 1:
                        error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
                    error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                    self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
                    error_handler.pop_error_context()
                    if len(hed_strings) > 1:
                        error_handler.pop_error_context()

                error_handler.pop_error_context()

        return def_dict

    def get_column_refs(self):
        """ Returns a list of column refs found in this sidecar.

            This does not validate

        Returns:
            column_refs(list): A list of unique column refs found.
        """
        found_vals = set()
        for column_data in self:
            if column_data.column_type == ColumnType.Ignore:
                continue
            hed_strings = column_data.get_hed_strings()
            matches = hed_strings.str.findall(r"\{([a-z_\-0-9]+)\}", re.IGNORECASE)
            u_vals = [match for sublist in matches for match in sublist]

            found_vals.update(u_vals)

        return list(found_vals)

all_hed_columns property

all_hed_columns

Return all columns that are HED compatible.

Returns:

Name Type Description
column_refs list

A list of all valid HED columns by name.

column_data property

column_data

Generate the ColumnMetadata for this sidecar.

Returns:

Name Type Description
dict {str: ColumnMetadata}

The column metadata defined by this sidecar.

def_dict property

def_dict: DefinitionDict

Definitions from this sidecar.

Generally you should instead call get_def_dict to get the relevant definitions.

Returns:

Name Type Description
DefinitionDict DefinitionDict

The definitions for this sidecar.

extract_definitions

extract_definitions(
    hed_schema, error_handler=None
) -> DefinitionDict

Gather and validate definitions in metadata.

Parameters:

Name Type Description Default
hed_schema HedSchema

The schema to used to identify tags.

required
error_handler ErrorHandler or None

The error handler to use for context, uses a default one if None.

None

Returns:

Name Type Description
DefinitionDict DefinitionDict

Contains all the definitions located in the sidecar.

Source code in hed/models/sidecar.py
def extract_definitions(self, hed_schema, error_handler=None) -> 'DefinitionDict':
    """ Gather and validate definitions in metadata.

    Parameters:
        hed_schema (HedSchema): The schema to used to identify tags.
        error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None.

    Returns:
        DefinitionDict: Contains all the definitions located in the sidecar.

    """
    if error_handler is None:
        error_handler = ErrorHandler()
    def_dict = DefinitionDict()

    self._extract_definition_issues = []
    if hed_schema:
        for column_data in self:
            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_data.column_name)
            hed_strings = column_data.get_hed_strings()
            for key_name, hed_string in hed_strings.items():
                hed_string_obj = HedString(hed_string, hed_schema)
                if len(hed_strings) > 1:
                    error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
                error_handler.pop_error_context()
                if len(hed_strings) > 1:
                    error_handler.pop_error_context()

            error_handler.pop_error_context()

    return def_dict

get_as_json_string

get_as_json_string() -> str

Return this sidecar's column metadata as a string.

Returns:

Name Type Description
str str

The json string representing this sidecar.

Source code in hed/models/sidecar.py
def get_as_json_string(self) -> str:
    """ Return this sidecar's column metadata as a string.

    Returns:
        str: The json string representing this sidecar.

    """
    return json.dumps(self.loaded_dict, indent=4)

get_column_refs

get_column_refs()

Returns a list of column refs found in this sidecar.

This does not validate

Returns:

Name Type Description
column_refs list

A list of unique column refs found.

Source code in hed/models/sidecar.py
def get_column_refs(self):
    """ Returns a list of column refs found in this sidecar.

        This does not validate

    Returns:
        column_refs(list): A list of unique column refs found.
    """
    found_vals = set()
    for column_data in self:
        if column_data.column_type == ColumnType.Ignore:
            continue
        hed_strings = column_data.get_hed_strings()
        matches = hed_strings.str.findall(r"\{([a-z_\-0-9]+)\}", re.IGNORECASE)
        u_vals = [match for sublist in matches for match in sublist]

        found_vals.update(u_vals)

    return list(found_vals)

get_def_dict

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> DefinitionDict

Return the definition dict for this sidecar.

Parameters:

Name Type Description Default
hed_schema HedSchema

Identifies tags to find definitions.

required
extra_def_dicts list, DefinitionDict, or None

Extra dicts to add to the list.

None

Returns:

Name Type Description
DefinitionDict DefinitionDict

A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/sidecar.py
def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
    """ Return the definition dict for this sidecar.

    Parameters:
        hed_schema (HedSchema): Identifies tags to find definitions.
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
        DefinitionDict:  A single definition dict representing all the data(and extra def dicts).
    """
    if self._def_dict is None and hed_schema:
        self._def_dict = self.extract_definitions(hed_schema)
    def_dicts = []
    if self.def_dict:
        def_dicts.append(self.def_dict)
    if extra_def_dicts:
        if not isinstance(extra_def_dicts, list):
            extra_def_dicts = [extra_def_dicts]
        def_dicts += extra_def_dicts
    return DefinitionDict(def_dicts)

load_sidecar_file

load_sidecar_file(file)

Load column metadata from a given json file.

Parameters:

Name Type Description Default
file str or FileLike

If a string, this is a filename. Otherwise, it will be parsed as a file-like.

required

:raises HedFileError: - If the file was not found or could not be parsed into JSON.

Source code in hed/models/sidecar.py
def load_sidecar_file(self, file):
    """ Load column metadata from a given json file.

    Parameters:
        file (str or FileLike): If a string, this is a filename. Otherwise, it will be parsed as a file-like.

    :raises HedFileError:
        - If the file was not found or could not be parsed into JSON.
    """
    if not file:
        return {}
    elif isinstance(file, str):
        if not self.name:
            self.name = file
        try:
            with open(file, "r") as fp:
                return self._load_json_file(fp)
        except OSError as e:
            raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) from e
    else:
        return self._load_json_file(file)

load_sidecar_files

load_sidecar_files(files)

Load json from a given file or list.

Parameters:

Name Type Description Default
files str or FileLike or list

A string or file-like object representing a JSON file, or a list of such.

required

:raises HedFileError: - If the file was not found or could not be parsed into JSON.

Source code in hed/models/sidecar.py
def load_sidecar_files(self, files):
    """ Load json from a given file or list.

    Parameters:
        files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.

    :raises HedFileError:
        - If the file was not found or could not be parsed into JSON.

    """
    if not files:
        return {}
    if not isinstance(files, list):
        files = [files]

    merged_dict = {}
    for file in files:
        loaded_json = self.load_sidecar_file(file)
        merged_dict.update(loaded_json)
    return merged_dict

save_as_json

save_as_json(save_filename)

Save column metadata to a JSON file.

Parameters:

Name Type Description Default
save_filename str

Path to save file.

required
Source code in hed/models/sidecar.py
def save_as_json(self, save_filename):
    """ Save column metadata to a JSON file.

    Parameters:
        save_filename (str): Path to save file.

    """
    with open(save_filename, "w") as fp:
        json.dump(self.loaded_dict, fp, indent=4)

validate

validate(
    hed_schema,
    extra_def_dicts=None,
    name=None,
    error_handler=None,
) -> list[dict]

Create a SidecarValidator and validate this sidecar with the schema.

Parameters:

Name Type Description Default
hed_schema HedSchema

Input data to be validated.

required
extra_def_dicts list or DefinitionDict

Extra def dicts in addition to sidecar.

None
name str

The name to report this sidecar as.

None
error_handler ErrorHandler

Error context to use. Creates a new one if None.

None

Returns:

Type Description
list[dict]

list[dict]: A list of issues associated with each level in the HED string.

Source code in hed/models/sidecar.py
def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
    """Create a SidecarValidator and validate this sidecar with the schema.

    Parameters:
        hed_schema (HedSchema): Input data to be validated.
        extra_def_dicts (list or DefinitionDict): Extra def dicts in addition to sidecar.
        name (str): The name to report this sidecar as.
        error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

    Returns:
        list[dict]: A list of issues associated with each level in the HED string.
    """
    from hed.validator.sidecar_validator import SidecarValidator

    if error_handler is None:
        error_handler = ErrorHandler()

    validator = SidecarValidator(hed_schema)
    issues = validator.validate(self, extra_def_dicts, name, error_handler=error_handler)
    return issues

TabularInput

TabularInput

Bases: BaseInput

A BIDS tabular file with sidecar.

Source code in hed/models/tabular_input.py
class TabularInput(BaseInput):
    """ A BIDS tabular file with sidecar. """

    HED_COLUMN_NAME = "HED"

    def __init__(self, file=None, sidecar=None, name=None):

        """ Constructor for the TabularInput class.

        Parameters:
            file (str or FileLike or pd.Dataframe): A tsv file to open.
            sidecar (str or Sidecar or FileLike): A Sidecar or source file/filename.
            name (str): The name to display for this file for error purposes.

        :raises HedFileError:
            - The file is blank.
            - An invalid dataframe was passed with size 0.
            - An invalid extension was provided.
            - A duplicate or empty column name appears.

        :raises OSError:
            - Cannot open the indicated file.

        :raises ValueError:
            - This file has no column names.
        """
        if sidecar and not isinstance(sidecar, Sidecar):
            sidecar = Sidecar(sidecar)
        new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME],
                                  warn_on_missing_column=True)

        self._sidecar = sidecar

        super().__init__(file, file_type=".tsv", worksheet_name=None, has_column_names=True, mapper=new_mapper,
                         name=name, allow_blank_names=False, )

        if not self._has_column_names:
            raise ValueError("You are attempting to open a bids_old style file with no column headers provided.\n"
                             "This is probably not intended.")

    def reset_column_mapper(self, sidecar=None):
        """ Change the sidecars and settings.

        Parameters:
            sidecar (str or [str] or Sidecar or [Sidecar]): A list of json filenames to pull sidecar info from.

        """
        new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
        self._sidecar = sidecar

        self.reset_mapper(new_mapper)

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
        """ Return the definition dict for this sidecar.

        Parameters:
            hed_schema (HedSchema): Used to identify tags to find definitions.
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
            DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
        """
        if self._sidecar:
            return self._sidecar.get_def_dict(hed_schema, extra_def_dicts)
        else:
            return super().get_def_dict(hed_schema, extra_def_dicts)

    def get_column_refs(self) -> list[str]:
        """ Return a list of column refs for this file.

            Default implementation returns none.

        Returns:
            list[str]: A list of unique column refs found.
        """
        if self._sidecar:
            return self._sidecar.get_column_refs()
        return []

    def get_sidecar(self) -> Union[Sidecar, None]:
        """Return the sidecar associated with this TabularInput."""
        return self._sidecar

get_column_refs

get_column_refs() -> list[str]

Return a list of column refs for this file.

Default implementation returns none.

Returns:

Type Description
list[str]

list[str]: A list of unique column refs found.

Source code in hed/models/tabular_input.py
def get_column_refs(self) -> list[str]:
    """ Return a list of column refs for this file.

        Default implementation returns none.

    Returns:
        list[str]: A list of unique column refs found.
    """
    if self._sidecar:
        return self._sidecar.get_column_refs()
    return []

get_def_dict

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> "DefinitionDict"

Return the definition dict for this sidecar.

Parameters:

Name Type Description Default
hed_schema HedSchema

Used to identify tags to find definitions.

required
extra_def_dicts list, DefinitionDict, or None

Extra dicts to add to the list.

None

Returns:

Name Type Description
DefinitionDict 'DefinitionDict'

A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/tabular_input.py
def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
    """ Return the definition dict for this sidecar.

    Parameters:
        hed_schema (HedSchema): Used to identify tags to find definitions.
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
        DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
    """
    if self._sidecar:
        return self._sidecar.get_def_dict(hed_schema, extra_def_dicts)
    else:
        return super().get_def_dict(hed_schema, extra_def_dicts)

get_sidecar

get_sidecar() -> Union[Sidecar, None]

Return the sidecar associated with this TabularInput.

Source code in hed/models/tabular_input.py
def get_sidecar(self) -> Union[Sidecar, None]:
    """Return the sidecar associated with this TabularInput."""
    return self._sidecar

reset_column_mapper

reset_column_mapper(sidecar=None)

Change the sidecars and settings.

Parameters:

Name Type Description Default
sidecar str or [str] or Sidecar or [Sidecar]

A list of json filenames to pull sidecar info from.

None
Source code in hed/models/tabular_input.py
def reset_column_mapper(self, sidecar=None):
    """ Change the sidecars and settings.

    Parameters:
        sidecar (str or [str] or Sidecar or [Sidecar]): A list of json filenames to pull sidecar info from.

    """
    new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
    self._sidecar = sidecar

    self.reset_mapper(new_mapper)

SpreadsheetInput

SpreadsheetInput

Bases: BaseInput

A spreadsheet of HED tags.

Source code in hed/models/spreadsheet_input.py
class SpreadsheetInput(BaseInput):
    """ A spreadsheet of HED tags. """

    def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=None,
                 has_column_names=True, column_prefix_dictionary=None,
                 name=None):
        """Constructor for the SpreadsheetInput class.

        Parameters:
            file (str or file like): An xlsx/tsv file to open or a File object.
            file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data.
            worksheet_name (str or None): The name of the Excel workbook worksheet that contains the HED tags.
                Not applicable to tsv files. If omitted for Excel, the first worksheet is assumed.
            tag_columns (list): A list of ints or strs containing the columns that contain the HED tags.
                If ints then column numbers with [1] indicating only the second column has tags.
            has_column_names (bool): True if file has column names. Validation will skip over the first row.
                first line of the file if the spreadsheet as column names.
            column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and
                values are HED tag prefixes to prepend to the tags in that column before processing.

        Notes:
            - If file is a string, file_type is derived from file and this parameter is ignored.
            - column_prefix_dictionary may be deprecated/renamed.  These are no longer prefixes,
              but rather converted to value columns.
              e.g. {"key": "Description", 1: "Label/"} will turn into value columns as
              {"key": "Description/#", 1: "Label/#"}
              It will be a validation issue if column 1 is called "key" in the above example.
              This means it no longer accepts anything but the value portion only in the columns.

        :raises HedFileError:
            - The file is blank.
            - An invalid dataframe was passed with size 0.
            - An invalid extension was provided.
            - A duplicate or empty column name appears.
            - Cannot open the indicated file.
            - The specified worksheet name does not exist.
        """

        self.tag_columns = tag_columns
        new_mapper = ColumnMapper(tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary,
                                  warn_on_missing_column=False)

        super().__init__(file, file_type, worksheet_name, has_column_names, new_mapper, name=name)

BaseInput

BaseInput

Superclass representing a basic columnar file.

Source code in hed/models/base_input.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
class BaseInput:
    """ Superclass representing a basic columnar file. """

    TEXT_EXTENSION = ['.tsv', '.txt']
    EXCEL_EXTENSION = ['.xlsx']

    def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, name=None,
                 allow_blank_names=True):
        """ Constructor for the BaseInput class.

        Parameters:
            file (str or file-like or pd.Dataframe): An xlsx/tsv file to open.
            file_type (str or None): ".xlsx" (Excel), ".tsv" or ".txt" (tab-separated text).
                Derived from file if file is a filename.  Ignored if pandas dataframe.
            worksheet_name (str or None): Name of Excel workbook worksheet name to use.
                (Not applicable to tsv files.)
            has_column_names (bool): True if file has column names.
                This value is ignored if you pass in a pandas dataframe.
            mapper (ColumnMapper or None):  Indicates which columns have HED tags.
                See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.
            name (str or None): Optional field for how this file will report errors.
            allow_blank_names(bool): If True, column names can be blank

        :raises HedFileError:
            - file is blank.
            - An invalid dataframe was passed with size 0.
            - An invalid extension was provided.
            - A duplicate or empty column name appears.
            - Cannot open the indicated file.
            - The specified worksheet name does not exist.
            - If the sidecar file or tabular file had invalid format and could not be read.

         """
        if mapper is None:
            mapper = ColumnMapper()
        self._mapper = mapper
        self._has_column_names = has_column_names
        self._name = name
        # This is the loaded workbook if we loaded originally from an Excel file.
        self._loaded_workbook = None
        self._worksheet_name = worksheet_name
        self._dataframe = None

        input_type = file_type
        if isinstance(file, str):
            if file_type is None:
                _, input_type = os.path.splitext(file)
            if self.name is None:
                self._name = file

        self._open_dataframe_file(file, has_column_names, input_type)

        column_issues = ColumnMapper.check_for_blank_names(self.columns, allow_blank_names=allow_blank_names)
        if column_issues:
            raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.",
                               self.name, issues=column_issues)

        self.reset_mapper(mapper)

    def reset_mapper(self, new_mapper):
        """ Set mapper to a different view of the file.

        Parameters:
            new_mapper (ColumnMapper): A column mapper to be associated with this base input.
        """
        self._mapper = new_mapper
        if not self._mapper:
            self._mapper = ColumnMapper()

        if self._dataframe is not None and self._has_column_names:
            columns = self._dataframe.columns
            self._mapper.set_column_map(columns)

    @property
    def dataframe(self):
        """ The underlying dataframe. """
        return self._dataframe

    @property
    def dataframe_a(self) ->pd.DataFrame:
        """Return the assembled dataframe Probably a placeholder name.

        Returns:
            pd.Dataframe: the assembled dataframe"""
        return self.assemble()

    @property
    def series_a(self) ->pd.Series:
        """Return the assembled dataframe as a series.

        Returns:
            pd.Series: the assembled dataframe with columns merged.
        """

        return self.combine_dataframe(self.assemble())

    @property
    def series_filtered(self) -> Union[pd.Series, None]:
        """Return the assembled dataframe as a series, with rows that have the same onset combined.

        Returns:
            Union[pd.Series, None] the assembled dataframe with columns merged, and the rows filtered together.
        """
        if self.onsets is not None:
            return filter_series_by_onset(self.series_a, self.onsets)
        return None

    @property
    def onsets(self):
        """Return the onset column if it exists. """
        if "onset" in self.columns:
            return self._dataframe["onset"]
        return None

    @property
    def needs_sorting(self) -> bool:
        """Return True if this both has an onset column, and it needs sorting."""
        onsets = self.onsets
        if onsets is not None:
            onsets = pd.to_numeric(self.dataframe['onset'], errors='coerce')
            return not onsets.is_monotonic_increasing
        else:
            return False

    @property
    def name(self) -> str:
        """ Name of the data. """
        return self._name

    @property
    def has_column_names(self) -> bool:
        """ True if dataframe has column names. """
        return self._has_column_names

    @property
    def loaded_workbook(self):
        """ The underlying loaded workbooks. """
        return self._loaded_workbook

    @property
    def worksheet_name(self):
        """ The worksheet name. """
        return self._worksheet_name

    def convert_to_form(self, hed_schema, tag_form):
        """ Convert all tags in underlying dataframe to the specified form.

        Parameters:
            hed_schema (HedSchema): The schema to use to convert tags.
            tag_form (str): HedTag property to convert tags to.
                Most cases should use convert_to_short or convert_to_long below.
        """
        from hed.models.df_util import convert_to_form
        convert_to_form(self._dataframe, hed_schema, tag_form, self._mapper.get_tag_columns())

    def convert_to_short(self, hed_schema):
        """ Convert all tags in underlying dataframe to short form.

        Parameters:
            hed_schema (HedSchema): The schema to use to convert tags.

        """
        self.convert_to_form(hed_schema, "short_tag")

    def convert_to_long(self, hed_schema):
        """ Convert all tags in underlying dataframe to long form.

        Parameters:
            hed_schema (HedSchema or None): The schema to use to convert tags.
        """
        self.convert_to_form(hed_schema, "long_tag")

    def shrink_defs(self, hed_schema):
        """ Shrinks any def-expand found in the underlying dataframe.

        Parameters:
            hed_schema (HedSchema or None): The schema to use to identify defs.
        """
        from df_util import shrink_defs
        shrink_defs(self._dataframe, hed_schema=hed_schema, columns=self._mapper.get_tag_columns())

    def expand_defs(self, hed_schema, def_dict):
        """ Shrinks any def-expand found in the underlying dataframe.

        Parameters:
            hed_schema (HedSchema or None): The schema to use to identify defs.
            def_dict (DefinitionDict): The definitions to expand.
        """
        from df_util import expand_defs
        expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns())

    def to_excel(self, file):
        """ Output to an Excel file.

        Parameters:
            file (str or file-like): Location to save this base input.

        :raises ValueError:
            - If empty file object was passed.

        :raises OSError:
            - Cannot open the indicated file.
        """
        if not file:
            raise ValueError("Empty file name or object passed in to BaseInput.save.")

        dataframe = self._dataframe
        if self._loaded_workbook:
            old_worksheet = self.get_worksheet(self._worksheet_name)
            # Excel spreadsheets are 1 based, then add another 1 for column names if present
            adj_row_for_col_names = 1
            if self._has_column_names:
                adj_row_for_col_names += 1
            adj_for_one_based_cols = 1
            for row_number, text_file_row in dataframe.iterrows():
                for column_number, column_text in enumerate(text_file_row):
                    cell_value = dataframe.iloc[row_number, column_number]
                    old_worksheet.cell(row_number + adj_row_for_col_names,
                                       column_number + adj_for_one_based_cols).value = cell_value

            self._loaded_workbook.save(file)
        else:
            dataframe.to_excel(file, header=self._has_column_names)

    def to_csv(self, file=None):
        """ Write to file or return as a string.

        Parameters:
            file (str, file-like, or None): Location to save this file. If None, return as string.
        Returns:
            None or str:  None if file is given or the contents as a str if file is None.

        :raises OSError:
            - Cannot open the indicated file.
        """
        dataframe = self._dataframe
        csv_string_if_filename_none = dataframe.to_csv(file, sep='\t', index=False, header=self._has_column_names)
        return csv_string_if_filename_none

    @property
    def columns(self):
        """ Returns a list of the column names.

            Empty if no column names.

        Returns:
            columns(list): The column names.
        """
        columns = []
        if self._dataframe is not None and self._has_column_names:
            columns = list(self._dataframe.columns)
        return columns

    def column_metadata(self) -> dict[int, 'ColumnMeta']:
        """ Return the metadata for each column.

        Returns:
            dict[int, 'ColumnMeta']: Number/ColumnMeta pairs.
        """
        if self._mapper:
            return self._mapper._final_column_map
        return {}

    def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_tag"):
        """ Replace the specified cell with transformed text.

        Parameters:
            row_number (int):    The row number of the spreadsheet to set.
            column_number (int): The column number of the spreadsheet to set.
            new_string_obj (HedString): Object with text to put in the given cell.
            tag_form (str): Version of the tags (short_tag, long_tag, base_tag, etc.)

        Notes:
             Any attribute of a HedTag that returns a string is a valid value of tag_form.

        :raises ValueError:
            - There is not a loaded dataframe.

        :raises KeyError:
            - The indicated row/column does not exist.

        :raises AttributeError:
            - The indicated tag_form is not an attribute of HedTag.
        """
        if self._dataframe is None:
            raise ValueError("No data frame loaded")

        new_text = new_string_obj.get_as_form(tag_form)
        self._dataframe.iloc[row_number, column_number] = new_text

    def get_worksheet(self, worksheet_name=None) -> Union[openpyxl.workbook.Workbook, None]:
        """ Get the requested worksheet.

        Parameters:
            worksheet_name (str or None): The name of the requested worksheet by name or the first one if None.

        Returns:
            Union[openpyxl.workbook.Workbook, None]: The workbook request.

        Notes:
            If None, returns the first worksheet.

        :raises KeyError:
            - The specified worksheet name does not exist.
        """
        if worksheet_name and self._loaded_workbook:
            # return self._loaded_workbook.get_sheet_by_name(worksheet_name)
            return self._loaded_workbook[worksheet_name]
        elif self._loaded_workbook:
            return self._loaded_workbook.worksheets[0]
        else:
            return None

    @staticmethod
    def _get_dataframe_from_worksheet(worksheet, has_headers) -> pd.DataFrame:
        """ Create a dataframe from the worksheet.

        Parameters:
            worksheet (Worksheet): The loaded worksheet to convert.
            has_headers (bool): True if this worksheet has column headers.

        Returns:
            pd.DataFrame: The converted data frame.

        """
        if has_headers:
            data = worksheet.values
            # first row is columns
            cols = next(data)
            data = list(data)
            return pd.DataFrame(data, columns=cols, dtype=str)
        else:
            return pd.DataFrame(worksheet.values, dtype=str)

    def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
        """Creates a SpreadsheetValidator and returns all issues with this file.

        Parameters:
            hed_schema (HedSchema): The schema to use for validation.
            extra_def_dicts (list of DefDict or DefDict): All definitions to use for validation.
            name (str): The name to report errors from this file as.
            error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

        Returns:
            list[dict]: A list of issues for a HED string.
        """
        from hed.validator.spreadsheet_validator import SpreadsheetValidator
        if not name:
            name = self.name
        tab_validator = SpreadsheetValidator(hed_schema)
        validation_issues = tab_validator.validate(self, self._mapper.get_def_dict(hed_schema, extra_def_dicts), name,
                                                   error_handler=error_handler)
        return validation_issues

    @staticmethod
    def _dataframe_has_names(dataframe) -> bool:
        for column in dataframe.columns:
            if isinstance(column, str):
                return True
        return False

    def assemble(self, mapper=None, skip_curly_braces=False) ->pd.DataFrame:
        """ Assembles the HED strings.

        Parameters:
            mapper (ColumnMapper or None): Generally pass none here unless you want special behavior.
            skip_curly_braces (bool): If True, don't plug in curly brace values into columns.
        Returns:
            pd.Dataframe: The assembled dataframe.
        """
        if mapper is None:
            mapper = self._mapper

        all_columns = self._handle_transforms(mapper)
        if skip_curly_braces:
            return all_columns
        transformers, _ = mapper.get_transformers()
        refs = self.get_column_refs()
        column_names = list(transformers)
        return _handle_curly_braces_refs(all_columns, refs, column_names)

    def _handle_transforms(self, mapper) -> pd.DataFrame:
        """ Apply transformations to the dataframe using the provided mapper.

        Parameters:
            mapper: The column mapper object containing transformation functions.

        Returns:
            pd.DataFrame: The transformed dataframe with all transformations applied.

        Notes:
            - Handles categorical column conversions before and after transformations
            - Returns original dataframe if no transformers are defined
            - Categorical columns are temporarily converted to 'category' type for processing
              then converted back to 'str' type after transformation
        """
        transformers, need_categorical = mapper.get_transformers()
        if transformers:
            all_columns = self._dataframe
            if need_categorical:
                all_columns[need_categorical] = all_columns[need_categorical].astype('category')

            all_columns = all_columns.transform(transformers)

            if need_categorical:
                all_columns[need_categorical] = all_columns[need_categorical].astype('str')
        else:
            all_columns = self._dataframe

        return all_columns

    @staticmethod
    def combine_dataframe(dataframe) ->pd.Series:
        """ Combine all columns in the given dataframe into a single HED string series,
            skipping empty columns and columns with empty strings.

        Parameters:
            dataframe (pd.Dataframe): The dataframe to combine

        Returns:
            pd.Series: The assembled series.
        """
        dataframe = dataframe.apply(
            lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))),
            axis=1
        )
        return dataframe

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
        """ Return the definition dict for this file.

        Note: Baseclass implementation returns just extra_def_dicts.

        Parameters:
            hed_schema (HedSchema): Identifies tags to find definitions(if needed).
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
            DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
        """
        from hed.models.definition_dict import DefinitionDict
        return DefinitionDict(extra_def_dicts, hed_schema)

    def get_column_refs(self) -> list:
        """ Return a list of column refs for this file.

            Default implementation returns empty list.

        Returns:
           list: A list of unique column refs found.
        """
        return []

    def _open_dataframe_file(self, file, has_column_names, input_type):
        """ Load data from various file types into the internal DataFrame.

        This method handles loading data from different file formats including Excel files,
        text files (TSV/CSV), and existing pandas DataFrames. It sets the _dataframe property
        and handles appropriate type conversions and error handling for each file type.

        Parameters:
            file (str, file-like, or pd.DataFrame): The input data source.
                - str: File path to load from
                - file-like: File object to read from
                - pd.DataFrame: Existing DataFrame to use directly
            has_column_names (bool): Whether the file contains column headers.
                Used to determine pandas header parameter for text files.
            input_type (str): File extension indicating the file type.
                Supported types: '.xlsx' (Excel), '.tsv', '.txt' (tab-separated text).

        Raises:
            HedFileError:
                - If file is empty or None (FILE_NOT_FOUND)
                - If unsupported file extension provided (INVALID_EXTENSION)
                - If file loading fails due to format issues (INVALID_FILE_FORMAT)

        Notes:
            - For DataFrame input: Converts to string type and auto-detects column names
            - For Excel files: Loads workbook and converts specified worksheet to DataFrame
            - For text files: Uses pandas read_csv with tab delimiter and handles empty files
            - All loaded data is converted to string type for consistency
            - NaN values in text files are replaced with "n/a"
        """
        pandas_header = 0 if has_column_names else None

        # If file is already a DataFrame
        if isinstance(file, pd.DataFrame):
            self._dataframe = file.astype(str)
            self._has_column_names = self._dataframe_has_names(self._dataframe)
            return

        # Check for empty file or None
        if not file:
            raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file specification passed to BaseInput.", file)

        # Handle Excel file input
        if input_type in self.EXCEL_EXTENSION:
            self._load_excel_file(file, has_column_names)
            return

        # Handle unsupported file extensions
        if input_type not in self.TEXT_EXTENSION:
            raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unsupported file extension for text files.",
                               self.name)

        # Handle text file input (CSV/TSV)
        self._load_text_file(file, pandas_header)

    def _load_excel_file(self, file, has_column_names):
        """ Load an Excel file into a pandas DataFrame.

        This method loads an Excel workbook using openpyxl, retrieves the specified
        worksheet (or the first one if none specified), and converts it to a pandas
        DataFrame. The loaded workbook is stored for potential later use in saving.

        Parameters:
            file (str or file-like): Path to the Excel file or file-like object to load.
                Must be a valid Excel file format (.xlsx).
            has_column_names (bool): Whether the first row of the worksheet contains
                column headers that should be used as DataFrame column names.

        Raises:
            HedFileError: If loading fails due to file format issues, missing file,
                corrupted Excel file, or any other openpyxl-related errors.
                The original exception is chained for debugging purposes.

        Notes:
            - Uses openpyxl library for Excel file handling
            - Stores the loaded workbook in self._loaded_workbook for later use
            - Retrieves worksheet using self._worksheet_name (or first sheet if None)
            - Converts worksheet data to DataFrame using _get_dataframe_from_worksheet
            - All data is converted to string type for consistency
        """
        try:
            self._loaded_workbook = openpyxl.load_workbook(file)
            loaded_worksheet = self.get_worksheet(self._worksheet_name)
            self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
        except Exception as e:
            raise HedFileError(HedExceptions.INVALID_FILE_FORMAT,
                               f"Failed to load Excel file: {str(e)}", self.name) from e

    def _load_text_file(self, file, pandas_header):
        """ Load a text file (TSV/CSV) into a pandas DataFrame.

        This method handles loading tab-separated value files and other text-based
        formats using pandas read_csv. It includes special handling for empty files,
        proper NaN value replacement, and comprehensive error handling.

        Parameters:
            file (str or file-like): Path to the text file or file-like object to load.
                Can be any format supported by pandas read_csv with tab delimiter.
            pandas_header (int or None): Row number to use as column headers.
                - 0: First row contains headers
                - None: No header row, generate default column names

        Raises:
            HedFileError: If loading fails due to file format issues, encoding problems,
                or any other pandas-related errors. The original exception is chained
                for debugging purposes.

        Notes:
            - Uses tab delimiter for parsing (appropriate for .tsv files)
            - Handles empty files by creating an empty DataFrame
            - Converts all data to string type for consistency
            - Replaces NaN values with "n/a" for consistent handling
            - Skips blank lines during parsing
            - Uses specific na_values configuration ("", "null")
            - Handles pandas.errors.EmptyDataError for files with no data
        """
        if isinstance(file, str) and os.path.exists(file) and os.path.getsize(file) == 0:
            self._dataframe = pd.DataFrame()  # Handle empty file
            return

        try:
            self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header, skip_blank_lines=True,
                                          dtype=str, keep_default_na=True, na_values=("", "null"))
            # Replace NaN values with a known value
            self._dataframe = self._dataframe.fillna("n/a")
        except pd.errors.EmptyDataError:
            self._dataframe = pd.DataFrame()  # Handle case where file has no data
        except Exception as e:
            raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load text file: {str(e)}",
                               self.name) from e

columns property

columns

Returns a list of the column names.

Empty if no column names.

Returns:

Name Type Description
columns list

The column names.

dataframe property

dataframe

The underlying dataframe.

dataframe_a property

dataframe_a: DataFrame

Return the assembled dataframe Probably a placeholder name.

Returns:

Type Description
DataFrame

pd.Dataframe: the assembled dataframe

has_column_names property

has_column_names: bool

True if dataframe has column names.

loaded_workbook property

loaded_workbook

The underlying loaded workbooks.

name property

name: str

Name of the data.

needs_sorting property

needs_sorting: bool

Return True if this both has an onset column, and it needs sorting.

onsets property

onsets

Return the onset column if it exists.

series_a property

series_a: Series

Return the assembled dataframe as a series.

Returns:

Type Description
Series

pd.Series: the assembled dataframe with columns merged.

series_filtered property

series_filtered: Union[Series, None]

Return the assembled dataframe as a series, with rows that have the same onset combined.

Returns:

Type Description
Union[Series, None]

Union[pd.Series, None] the assembled dataframe with columns merged, and the rows filtered together.

worksheet_name property

worksheet_name

The worksheet name.

assemble

assemble(
    mapper=None, skip_curly_braces=False
) -> pd.DataFrame

Assembles the HED strings.

Parameters:

Name Type Description Default
mapper ColumnMapper or None

Generally pass none here unless you want special behavior.

None
skip_curly_braces bool

If True, don't plug in curly brace values into columns.

False

Returns: pd.Dataframe: The assembled dataframe.

Source code in hed/models/base_input.py
def assemble(self, mapper=None, skip_curly_braces=False) ->pd.DataFrame:
    """ Assembles the HED strings.

    Parameters:
        mapper (ColumnMapper or None): Generally pass none here unless you want special behavior.
        skip_curly_braces (bool): If True, don't plug in curly brace values into columns.
    Returns:
        pd.Dataframe: The assembled dataframe.
    """
    if mapper is None:
        mapper = self._mapper

    all_columns = self._handle_transforms(mapper)
    if skip_curly_braces:
        return all_columns
    transformers, _ = mapper.get_transformers()
    refs = self.get_column_refs()
    column_names = list(transformers)
    return _handle_curly_braces_refs(all_columns, refs, column_names)

column_metadata

column_metadata() -> dict[int, ColumnMeta]

Return the metadata for each column.

Returns:

Type Description
dict[int, ColumnMeta]

dict[int, 'ColumnMeta']: Number/ColumnMeta pairs.

Source code in hed/models/base_input.py
def column_metadata(self) -> dict[int, 'ColumnMeta']:
    """ Return the metadata for each column.

    Returns:
        dict[int, 'ColumnMeta']: Number/ColumnMeta pairs.
    """
    if self._mapper:
        return self._mapper._final_column_map
    return {}

combine_dataframe staticmethod

combine_dataframe(dataframe) -> pd.Series

Combine all columns in the given dataframe into a single HED string series, skipping empty columns and columns with empty strings.

Parameters:

Name Type Description Default
dataframe Dataframe

The dataframe to combine

required

Returns:

Type Description
Series

pd.Series: The assembled series.

Source code in hed/models/base_input.py
@staticmethod
def combine_dataframe(dataframe) ->pd.Series:
    """ Combine all columns in the given dataframe into a single HED string series,
        skipping empty columns and columns with empty strings.

    Parameters:
        dataframe (pd.Dataframe): The dataframe to combine

    Returns:
        pd.Series: The assembled series.
    """
    dataframe = dataframe.apply(
        lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))),
        axis=1
    )
    return dataframe

convert_to_form

convert_to_form(hed_schema, tag_form)

Convert all tags in underlying dataframe to the specified form.

Parameters:

Name Type Description Default
hed_schema HedSchema

The schema to use to convert tags.

required
tag_form str

HedTag property to convert tags to. Most cases should use convert_to_short or convert_to_long below.

required
Source code in hed/models/base_input.py
def convert_to_form(self, hed_schema, tag_form):
    """ Convert all tags in underlying dataframe to the specified form.

    Parameters:
        hed_schema (HedSchema): The schema to use to convert tags.
        tag_form (str): HedTag property to convert tags to.
            Most cases should use convert_to_short or convert_to_long below.
    """
    from hed.models.df_util import convert_to_form
    convert_to_form(self._dataframe, hed_schema, tag_form, self._mapper.get_tag_columns())

convert_to_long

convert_to_long(hed_schema)

Convert all tags in underlying dataframe to long form.

Parameters:

Name Type Description Default
hed_schema HedSchema or None

The schema to use to convert tags.

required
Source code in hed/models/base_input.py
def convert_to_long(self, hed_schema):
    """ Convert all tags in underlying dataframe to long form.

    Parameters:
        hed_schema (HedSchema or None): The schema to use to convert tags.
    """
    self.convert_to_form(hed_schema, "long_tag")

convert_to_short

convert_to_short(hed_schema)

Convert all tags in underlying dataframe to short form.

Parameters:

Name Type Description Default
hed_schema HedSchema

The schema to use to convert tags.

required
Source code in hed/models/base_input.py
def convert_to_short(self, hed_schema):
    """ Convert all tags in underlying dataframe to short form.

    Parameters:
        hed_schema (HedSchema): The schema to use to convert tags.

    """
    self.convert_to_form(hed_schema, "short_tag")

expand_defs

expand_defs(hed_schema, def_dict)

Shrinks any def-expand found in the underlying dataframe.

Parameters:

Name Type Description Default
hed_schema HedSchema or None

The schema to use to identify defs.

required
def_dict DefinitionDict

The definitions to expand.

required
Source code in hed/models/base_input.py
def expand_defs(self, hed_schema, def_dict):
    """ Shrinks any def-expand found in the underlying dataframe.

    Parameters:
        hed_schema (HedSchema or None): The schema to use to identify defs.
        def_dict (DefinitionDict): The definitions to expand.
    """
    from df_util import expand_defs
    expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns())

get_column_refs

get_column_refs() -> list

Return a list of column refs for this file.

Default implementation returns empty list.

Returns:

Name Type Description
list list

A list of unique column refs found.

Source code in hed/models/base_input.py
def get_column_refs(self) -> list:
    """ Return a list of column refs for this file.

        Default implementation returns empty list.

    Returns:
       list: A list of unique column refs found.
    """
    return []

get_def_dict

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> DefinitionDict

Return the definition dict for this file.

Note: Baseclass implementation returns just extra_def_dicts.

Parameters:

Name Type Description Default
hed_schema HedSchema

Identifies tags to find definitions(if needed).

required
extra_def_dicts list, DefinitionDict, or None

Extra dicts to add to the list.

None

Returns:

Name Type Description
DefinitionDict DefinitionDict

A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/base_input.py
def get_def_dict(self, hed_schema, extra_def_dicts=None) -> 'DefinitionDict':
    """ Return the definition dict for this file.

    Note: Baseclass implementation returns just extra_def_dicts.

    Parameters:
        hed_schema (HedSchema): Identifies tags to find definitions(if needed).
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
        DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
    """
    from hed.models.definition_dict import DefinitionDict
    return DefinitionDict(extra_def_dicts, hed_schema)

get_worksheet

get_worksheet(
    worksheet_name=None,
) -> Union[openpyxl.workbook.Workbook, None]

Get the requested worksheet.

Parameters:

Name Type Description Default
worksheet_name str or None

The name of the requested worksheet by name or the first one if None.

None

Returns:

Type Description
Union[Workbook, None]

Union[openpyxl.workbook.Workbook, None]: The workbook request.

Notes

If None, returns the first worksheet.

:raises KeyError: - The specified worksheet name does not exist.

Source code in hed/models/base_input.py
def get_worksheet(self, worksheet_name=None) -> Union[openpyxl.workbook.Workbook, None]:
    """ Get the requested worksheet.

    Parameters:
        worksheet_name (str or None): The name of the requested worksheet by name or the first one if None.

    Returns:
        Union[openpyxl.workbook.Workbook, None]: The workbook request.

    Notes:
        If None, returns the first worksheet.

    :raises KeyError:
        - The specified worksheet name does not exist.
    """
    if worksheet_name and self._loaded_workbook:
        # return self._loaded_workbook.get_sheet_by_name(worksheet_name)
        return self._loaded_workbook[worksheet_name]
    elif self._loaded_workbook:
        return self._loaded_workbook.worksheets[0]
    else:
        return None

reset_mapper

reset_mapper(new_mapper)

Set mapper to a different view of the file.

Parameters:

Name Type Description Default
new_mapper ColumnMapper

A column mapper to be associated with this base input.

required
Source code in hed/models/base_input.py
def reset_mapper(self, new_mapper):
    """ Set mapper to a different view of the file.

    Parameters:
        new_mapper (ColumnMapper): A column mapper to be associated with this base input.
    """
    self._mapper = new_mapper
    if not self._mapper:
        self._mapper = ColumnMapper()

    if self._dataframe is not None and self._has_column_names:
        columns = self._dataframe.columns
        self._mapper.set_column_map(columns)

set_cell

set_cell(
    row_number,
    column_number,
    new_string_obj,
    tag_form="short_tag",
)

Replace the specified cell with transformed text.

Parameters:

Name Type Description Default
row_number int

The row number of the spreadsheet to set.

required
column_number int

The column number of the spreadsheet to set.

required
new_string_obj HedString

Object with text to put in the given cell.

required
tag_form str

Version of the tags (short_tag, long_tag, base_tag, etc.)

'short_tag'
Notes

Any attribute of a HedTag that returns a string is a valid value of tag_form.

:raises ValueError: - There is not a loaded dataframe.

:raises KeyError: - The indicated row/column does not exist.

:raises AttributeError: - The indicated tag_form is not an attribute of HedTag.

Source code in hed/models/base_input.py
def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_tag"):
    """ Replace the specified cell with transformed text.

    Parameters:
        row_number (int):    The row number of the spreadsheet to set.
        column_number (int): The column number of the spreadsheet to set.
        new_string_obj (HedString): Object with text to put in the given cell.
        tag_form (str): Version of the tags (short_tag, long_tag, base_tag, etc.)

    Notes:
         Any attribute of a HedTag that returns a string is a valid value of tag_form.

    :raises ValueError:
        - There is not a loaded dataframe.

    :raises KeyError:
        - The indicated row/column does not exist.

    :raises AttributeError:
        - The indicated tag_form is not an attribute of HedTag.
    """
    if self._dataframe is None:
        raise ValueError("No data frame loaded")

    new_text = new_string_obj.get_as_form(tag_form)
    self._dataframe.iloc[row_number, column_number] = new_text

shrink_defs

shrink_defs(hed_schema)

Shrinks any def-expand found in the underlying dataframe.

Parameters:

Name Type Description Default
hed_schema HedSchema or None

The schema to use to identify defs.

required
Source code in hed/models/base_input.py
def shrink_defs(self, hed_schema):
    """ Shrinks any def-expand found in the underlying dataframe.

    Parameters:
        hed_schema (HedSchema or None): The schema to use to identify defs.
    """
    from df_util import shrink_defs
    shrink_defs(self._dataframe, hed_schema=hed_schema, columns=self._mapper.get_tag_columns())

to_csv

to_csv(file=None)

Write to file or return as a string.

Parameters:

Name Type Description Default
file str, file-like, or None

Location to save this file. If None, return as string.

None

Returns: None or str: None if file is given or the contents as a str if file is None.

:raises OSError: - Cannot open the indicated file.

Source code in hed/models/base_input.py
def to_csv(self, file=None):
    """ Write to file or return as a string.

    Parameters:
        file (str, file-like, or None): Location to save this file. If None, return as string.
    Returns:
        None or str:  None if file is given or the contents as a str if file is None.

    :raises OSError:
        - Cannot open the indicated file.
    """
    dataframe = self._dataframe
    csv_string_if_filename_none = dataframe.to_csv(file, sep='\t', index=False, header=self._has_column_names)
    return csv_string_if_filename_none

to_excel

to_excel(file)

Output to an Excel file.

Parameters:

Name Type Description Default
file str or file - like

Location to save this base input.

required

:raises ValueError: - If empty file object was passed.

:raises OSError: - Cannot open the indicated file.

Source code in hed/models/base_input.py
def to_excel(self, file):
    """ Output to an Excel file.

    Parameters:
        file (str or file-like): Location to save this base input.

    :raises ValueError:
        - If empty file object was passed.

    :raises OSError:
        - Cannot open the indicated file.
    """
    if not file:
        raise ValueError("Empty file name or object passed in to BaseInput.save.")

    dataframe = self._dataframe
    if self._loaded_workbook:
        old_worksheet = self.get_worksheet(self._worksheet_name)
        # Excel spreadsheets are 1 based, then add another 1 for column names if present
        adj_row_for_col_names = 1
        if self._has_column_names:
            adj_row_for_col_names += 1
        adj_for_one_based_cols = 1
        for row_number, text_file_row in dataframe.iterrows():
            for column_number, column_text in enumerate(text_file_row):
                cell_value = dataframe.iloc[row_number, column_number]
                old_worksheet.cell(row_number + adj_row_for_col_names,
                                   column_number + adj_for_one_based_cols).value = cell_value

        self._loaded_workbook.save(file)
    else:
        dataframe.to_excel(file, header=self._has_column_names)

validate

validate(
    hed_schema,
    extra_def_dicts=None,
    name=None,
    error_handler=None,
) -> list[dict]

Creates a SpreadsheetValidator and returns all issues with this file.

Parameters:

Name Type Description Default
hed_schema HedSchema

The schema to use for validation.

required
extra_def_dicts list of DefDict or DefDict

All definitions to use for validation.

None
name str

The name to report errors from this file as.

None
error_handler ErrorHandler

Error context to use. Creates a new one if None.

None

Returns:

Type Description
list[dict]

list[dict]: A list of issues for a HED string.

Source code in hed/models/base_input.py
def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None) -> list[dict]:
    """Creates a SpreadsheetValidator and returns all issues with this file.

    Parameters:
        hed_schema (HedSchema): The schema to use for validation.
        extra_def_dicts (list of DefDict or DefDict): All definitions to use for validation.
        name (str): The name to report errors from this file as.
        error_handler (ErrorHandler): Error context to use.  Creates a new one if None.

    Returns:
        list[dict]: A list of issues for a HED string.
    """
    from hed.validator.spreadsheet_validator import SpreadsheetValidator
    if not name:
        name = self.name
    tab_validator = SpreadsheetValidator(hed_schema)
    validation_issues = tab_validator.validate(self, self._mapper.get_def_dict(hed_schema, extra_def_dicts), name,
                                               error_handler=error_handler)
    return validation_issues

ColumnMapper

ColumnMapper

Mapping of a base input file columns into HED tags.

Notes
  • All column numbers are 0 based.
Source code in hed/models/column_mapper.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
class ColumnMapper:
    """ Mapping of a base input file columns into HED tags.

    Notes:
        - All column numbers are 0 based.
    """

    def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None,
                 optional_tag_columns=None, warn_on_missing_column=False):
        """ Constructor for ColumnMapper.

        Parameters:
            sidecar (Sidecar): A sidecar to gather column data from.
            tag_columns: (list):  A list of ints or strings containing the columns that contain the HED tags.
                Sidecar column definitions will take precedent if there is a conflict with tag_columns.
            column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag
                prefixes to prepend to the tags in that column before processing.
            optional_tag_columns (list): A list of ints or strings containing the columns that contain
                the HED tags. If the column is otherwise unspecified, convert this column type to HEDTags.
            warn_on_missing_column (bool): If True, issue mapping warnings on column names that are missing from
                                            the sidecar.

        Notes:
            - All column numbers are 0 based.
            - The column_prefix_dictionary may be deprecated/renamed in the future.
                - These are no longer prefixes, but rather converted to value columns:
                  {"key": "Description", 1: "Label/"} will turn into value columns as
                  {"key": "Description/#", 1: "Label/#"}
                  It will be a validation issue if column 1 is called "key" in the above example.
                  This means it no longer accepts anything but the value portion only in the columns.

        """

        # Maps column number to column_entry.  This is what's actually used by most code.
        self._final_column_map = {}
        self._no_mapping_info = True
        self._column_map = {}
        self._reverse_column_map = {}
        self._warn_on_missing_column = warn_on_missing_column
        if tag_columns is None:
            tag_columns = []
        self._tag_columns = tag_columns
        if optional_tag_columns is None:
            optional_tag_columns = []
        self._optional_tag_columns = optional_tag_columns
        if column_prefix_dictionary is None:
            column_prefix_dictionary = {}
        self._column_prefix_dictionary = column_prefix_dictionary

        self._na_patterns = ["n/a", "nan"]
        self._sidecar = None
        self._set_sidecar(sidecar)

        # finalize the column map based on initial settings with no header
        self._finalize_mapping()

    @property
    def tag_columns(self):
        """ Return the known tag and optional tag columns with numbers as names when possible.

            Returns:
                tag_columns(list of str or int): A list of all tag and optional tag columns as labels.
        """
        joined_list = self._tag_columns + self._optional_tag_columns
        return list(set(self._convert_to_names(self._column_map, joined_list)))

    @property
    def column_prefix_dictionary(self):
        """ Return the column_prefix_dictionary with numbers turned into names where possible.

            Returns:
                column_prefix_dictionary(list of str or int): A column_prefix_dictionary with column labels as keys.
        """
        return self._convert_to_names_dict(self._column_map, self._column_prefix_dictionary)

    def get_transformers(self):
        """ Return the transformers to use on a dataframe.

            Returns:
                tuple(dict, list):
                    dict({str or int: func}): The functions to use to transform each column.
                    need_categorical(list of int): A list of columns to treat as categorical.
        """
        final_transformers = {}
        need_categorical = []
        for column in self._final_column_map.values():
            assign_to_column = column.column_name
            if isinstance(assign_to_column, int):
                if self._column_map:
                    assign_to_column = self._column_map[assign_to_column]
                else:
                    assign_to_column = assign_to_column
            if column.column_type == ColumnType.Ignore:
                continue
            elif column.column_type == ColumnType.Value:
                value_str = column.hed_dict
                from functools import partial
                final_transformers[assign_to_column] = partial(self._value_handler, value_str)
            elif column.column_type == ColumnType.Categorical:
                need_categorical.append(column.column_name)
                category_values = column.hed_dict
                from functools import partial
                final_transformers[assign_to_column] = partial(self._category_handler, category_values)
            else:
                final_transformers[assign_to_column] = lambda x: x

        return final_transformers, need_categorical

    @staticmethod
    def check_for_blank_names(column_map, allow_blank_names) -> list[dict]:
        """ Validate there are no blank column names.

        Parameters:
            column_map (iterable): A list of column names.
            allow_blank_names (bool): Only find issues if True.

        Returns:
            list[dict]: A list of dicts, one per issue.
        """
        # We don't have any checks right now if blank/duplicate is allowed
        if allow_blank_names:
            return []

        issues = []

        for column_number, name in enumerate(column_map):
            if name is None or not name or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number)
                continue

        return issues

    def _set_sidecar(self, sidecar):
        """ Set the sidecar this column mapper uses.

        Parameters:
            sidecar (Sidecar or None): The sidecar to use.

        :raises ValueError:
            - A sidecar was previously set.
        """
        if self._sidecar:
            raise ValueError("Trying to set a second sidecar on a column mapper.")
        if not sidecar:
            return None

        self._sidecar = sidecar

    @property
    def sidecar_column_data(self):
        """ Pass through to get the sidecar ColumnMetadata.

        Returns:
            dict({str:ColumnMetadata}): The column metadata defined by this sidecar.
        """
        if self._sidecar:
            return self._sidecar.column_data

        return {}

    def get_tag_columns(self):
        """ Return the column numbers or names that are mapped to be HedTags.

            Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.

        Returns:
            column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
                0-based if integer-based, otherwise column name.
        """
        return [column_entry.column_name for number, column_entry in self._final_column_map.items()
                if column_entry.column_type == ColumnType.HEDTags]

    def set_tag_columns(self, tag_columns=None, optional_tag_columns=None, finalize_mapping=True):
        """ Set tag columns and optional tag columns.

        Parameters:
            tag_columns (list): A list of ints or strings containing the columns that contain the HED tags.
                                If None, clears existing tag_columns
            optional_tag_columns (list): A list of ints or strings containing the columns that contain the HED tags,
                                         but not an error if missing.
                                         If None, clears existing tag_columns
            finalize_mapping (bool): Re-generate the internal mapping if True, otherwise no effect until finalize.
        """
        if tag_columns is None:
            tag_columns = []
        if optional_tag_columns is None:
            optional_tag_columns = []
        self._tag_columns = tag_columns
        self._optional_tag_columns = optional_tag_columns
        if finalize_mapping:
            self._finalize_mapping()

    def set_column_map(self, new_column_map=None) -> list[dict]:
        """ Set the column number to name mapping.

        Parameters:
            new_column_map (list or dict):  Either an ordered list of the column names or column_number:column name.
                dictionary. In both cases, column numbers start at 0.

        Returns:
            list[dict]: List of issues. Each issue is a dictionary.

        """
        if new_column_map is None:
            new_column_map = {}
        if isinstance(new_column_map, dict):
            column_map = new_column_map
        # List like
        else:
            column_map = {column_number: column_name for column_number, column_name in enumerate(new_column_map)}
        self._column_map = column_map
        self._reverse_column_map = {column_name: column_number for column_number, column_name in column_map.items()}
        self._finalize_mapping()

    def set_column_prefix_dictionary(self, column_prefix_dictionary, finalize_mapping=True):
        """Set the column prefix dictionary. """
        self._column_prefix_dictionary = column_prefix_dictionary
        if finalize_mapping:
            self._finalize_mapping()

    @staticmethod
    def _get_sidecar_basic_map(column_map, column_data):
        basic_final_map = {}
        unhandled_cols = []
        if column_map:
            for column_number, column_name in column_map.items():
                if column_name is None:
                    continue
                if column_name in column_data:
                    column_entry = copy.deepcopy(column_data[column_name])
                    column_entry.column_name = column_name
                    basic_final_map[column_name] = column_entry
                    continue
                elif isinstance(column_name, str) and column_name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                    continue
                unhandled_cols.append(column_name)

        return basic_final_map, unhandled_cols

    @staticmethod
    def _convert_to_names(column_to_name_map, column_list):
        converted_names = []
        for index in column_list:
            if isinstance(index, int):
                if not column_to_name_map:
                    converted_names.append(index)
                elif index in column_to_name_map:
                    converted_names.append(column_to_name_map[index])
            else:
                if index in column_to_name_map.values():
                    converted_names.append(index)
        return converted_names

    @staticmethod
    def _convert_to_names_dict(column_to_name_map, column_dict):
        converted_dict = {}
        for index, column_data in column_dict.items():
            if isinstance(index, int):
                if not column_to_name_map:
                    converted_dict[index] = column_data
                elif index in column_to_name_map:
                    converted_dict[column_to_name_map[index]] = column_data
            else:
                if index in column_to_name_map.values():
                    converted_dict[index] = column_data
        return converted_dict

    @staticmethod
    def _add_value_columns(final_map, column_prefix_dictionary):
        for col, prefix in column_prefix_dictionary.items():
            if prefix.endswith("/"):
                prefix = prefix + "#"
            else:
                prefix = prefix + "/#"
            new_def = ColumnMetadata(ColumnType.Value, col, source=prefix)
            final_map[col] = new_def

    @staticmethod
    def _add_tag_columns(final_map, tag_columns):
        for col in tag_columns:
            new_def = ColumnMetadata(ColumnType.HEDTags, col)
            final_map[col] = new_def

    def _get_column_lists(self):
        column_lists = self._tag_columns, self._optional_tag_columns, self._column_prefix_dictionary
        list_names = ["tag_columns", "optional_tag_columns", "column_prefix_dictionary"]

        if not any(column for column in column_lists):
            return column_lists, list_names
        # Filter out empty lists from the above
        column_lists, list_names = zip(*[(col_list, list_name) for col_list, list_name in zip(column_lists, list_names)
                                         if col_list])

        return column_lists, list_names

    def _check_for_duplicates_and_required(self, list_names, column_lists) -> list[dict]:
        """ Check for duplicates and required columns in the given lists.
        """
        issues = []
        for list_name, col_list in zip(list_names, column_lists):
            # Convert all known strings to ints, then check for duplicates
            converted_list = [item if isinstance(item, int) else self._reverse_column_map.get(item, item)
                              for item in col_list]

            if col_list != self._optional_tag_columns:
                for test_col in converted_list:
                    if isinstance(test_col, str) and test_col not in self._reverse_column_map:
                        issues += ErrorHandler.format_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN,
                                                            test_col, list_name)

            issues += self._check_for_duplicates_between_lists(converted_list, list_name,
                                                               ValidationErrors.DUPLICATE_COLUMN_IN_LIST)

        return issues

    def _check_for_duplicates_between_lists(self, checking_list, list_names, error_type):
        issues = []
        duplicates = [item for item, count in Counter(checking_list).items() if count > 1]
        for duplicate in duplicates:
            issues += ErrorHandler.format_error(error_type, duplicate,
                                                self._column_map.get(duplicate), list_names)
        return issues

    def check_for_mapping_issues(self, allow_blank_names=False) ->list[dict]:
        """ Find all issues given the current column_map, tag_columns, etc.

        Parameters:
            allow_blank_names (bool): Only flag blank names if False.

        Returns:
            list[dict]: All issues found as a list of dicts.
        """
        # 1. Get the lists with entries
        column_lists, list_names = self._get_column_lists()
        # 2. Verify column_prefix columns and tag columns are present, and check for duplicates
        issues = self._check_for_duplicates_and_required(list_names, column_lists)

        combined_list = self.tag_columns + list(self.column_prefix_dictionary)
        # 3. Verify prefix and tag columns do not conflict.
        issues += self._check_for_duplicates_between_lists(combined_list, list_names,
                                                           ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES)

        # 4. Verify we didn't get both a sidecar and a tag column list
        if self._sidecar and combined_list and combined_list != ["HED"]:
            issues += ErrorHandler.format_error(ValidationErrors.SIDECAR_AND_OTHER_COLUMNS, column_names=combined_list)

        # 5. Verify we handled all columns
        if self._warn_on_missing_column:
            fully_combined_list = list(self.sidecar_column_data) + combined_list + NO_WARN_COLUMNS
            for column in self._column_map.values():
                if column not in fully_combined_list:
                    issues += ErrorHandler.format_error(ValidationErrors.HED_UNKNOWN_COLUMN, column)

        issues += self.check_for_blank_names(self._column_map.values(), allow_blank_names=allow_blank_names)
        return issues

    def _finalize_mapping(self):
        final_map, unhandled_cols = self._get_sidecar_basic_map(self._column_map, self.sidecar_column_data)

        self._add_tag_columns(final_map, self.tag_columns)
        self._remove_from_list(unhandled_cols, self.tag_columns)

        self._add_value_columns(final_map, self.column_prefix_dictionary)
        self._remove_from_list(unhandled_cols, self.column_prefix_dictionary)

        self._final_column_map = dict(sorted(final_map.items()))

    @staticmethod
    def _remove_from_list(list_to_alter, to_remove) -> list:
        return [item for item in list_to_alter if item not in to_remove]

    def get_def_dict(self, hed_schema, extra_def_dicts=None) -> DefinitionDict:
        """ Return def dicts from every column description.

        Parameters:
            hed_schema (Schema): A HED schema object to use for extracting definitions.
            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

        Returns:
           DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
        """
        if self._sidecar:
            return self._sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

        return DefinitionDict(extra_def_dicts, hed_schema=hed_schema)

    def get_column_mapping_issues(self) -> list[dict]:
        """ Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.).

        Notes:
            - This is deprecated and now a wrapper for "check_for_mapping_issues()".

        Returns:
            list[dict]: A list dictionaries of all issues found from mapping column names to numbers.

        """
        return self.check_for_mapping_issues()

    @staticmethod
    def _category_handler(category_values, x):
        return category_values.get(x, "")

    @staticmethod
    def _value_handler(value_str, x):
        if x == "n/a":
            return "n/a"

        return value_str.replace("#", str(x))

column_prefix_dictionary property

column_prefix_dictionary

Return the column_prefix_dictionary with numbers turned into names where possible.

Returns:

Name Type Description
column_prefix_dictionary list of str or int

A column_prefix_dictionary with column labels as keys.

sidecar_column_data property

sidecar_column_data

Pass through to get the sidecar ColumnMetadata.

Returns:

Name Type Description
dict {str: ColumnMetadata}

The column metadata defined by this sidecar.

tag_columns property

tag_columns

Return the known tag and optional tag columns with numbers as names when possible.

Returns:

Name Type Description
tag_columns list of str or int

A list of all tag and optional tag columns as labels.

check_for_blank_names staticmethod

check_for_blank_names(
    column_map, allow_blank_names
) -> list[dict]

Validate there are no blank column names.

Parameters:

Name Type Description Default
column_map iterable

A list of column names.

required
allow_blank_names bool

Only find issues if True.

required

Returns:

Type Description
list[dict]

list[dict]: A list of dicts, one per issue.

Source code in hed/models/column_mapper.py
@staticmethod
def check_for_blank_names(column_map, allow_blank_names) -> list[dict]:
    """ Validate there are no blank column names.

    Parameters:
        column_map (iterable): A list of column names.
        allow_blank_names (bool): Only find issues if True.

    Returns:
        list[dict]: A list of dicts, one per issue.
    """
    # We don't have any checks right now if blank/duplicate is allowed
    if allow_blank_names:
        return []

    issues = []

    for column_number, name in enumerate(column_map):
        if name is None or not name or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
            issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number)
            continue

    return issues

check_for_mapping_issues

check_for_mapping_issues(
    allow_blank_names=False,
) -> list[dict]

Find all issues given the current column_map, tag_columns, etc.

Parameters:

Name Type Description Default
allow_blank_names bool

Only flag blank names if False.

False

Returns:

Type Description
list[dict]

list[dict]: All issues found as a list of dicts.

Source code in hed/models/column_mapper.py
def check_for_mapping_issues(self, allow_blank_names=False) ->list[dict]:
    """ Find all issues given the current column_map, tag_columns, etc.

    Parameters:
        allow_blank_names (bool): Only flag blank names if False.

    Returns:
        list[dict]: All issues found as a list of dicts.
    """
    # 1. Get the lists with entries
    column_lists, list_names = self._get_column_lists()
    # 2. Verify column_prefix columns and tag columns are present, and check for duplicates
    issues = self._check_for_duplicates_and_required(list_names, column_lists)

    combined_list = self.tag_columns + list(self.column_prefix_dictionary)
    # 3. Verify prefix and tag columns do not conflict.
    issues += self._check_for_duplicates_between_lists(combined_list, list_names,
                                                       ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES)

    # 4. Verify we didn't get both a sidecar and a tag column list
    if self._sidecar and combined_list and combined_list != ["HED"]:
        issues += ErrorHandler.format_error(ValidationErrors.SIDECAR_AND_OTHER_COLUMNS, column_names=combined_list)

    # 5. Verify we handled all columns
    if self._warn_on_missing_column:
        fully_combined_list = list(self.sidecar_column_data) + combined_list + NO_WARN_COLUMNS
        for column in self._column_map.values():
            if column not in fully_combined_list:
                issues += ErrorHandler.format_error(ValidationErrors.HED_UNKNOWN_COLUMN, column)

    issues += self.check_for_blank_names(self._column_map.values(), allow_blank_names=allow_blank_names)
    return issues

get_column_mapping_issues

get_column_mapping_issues() -> list[dict]

Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.).

Notes
  • This is deprecated and now a wrapper for "check_for_mapping_issues()".

Returns:

Type Description
list[dict]

list[dict]: A list dictionaries of all issues found from mapping column names to numbers.

Source code in hed/models/column_mapper.py
def get_column_mapping_issues(self) -> list[dict]:
    """ Get all the issues with finalizing column mapping(duplicate columns, missing required, etc.).

    Notes:
        - This is deprecated and now a wrapper for "check_for_mapping_issues()".

    Returns:
        list[dict]: A list dictionaries of all issues found from mapping column names to numbers.

    """
    return self.check_for_mapping_issues()

get_def_dict

get_def_dict(
    hed_schema, extra_def_dicts=None
) -> DefinitionDict

Return def dicts from every column description.

Parameters:

Name Type Description Default
hed_schema Schema

A HED schema object to use for extracting definitions.

required
extra_def_dicts list, DefinitionDict, or None

Extra dicts to add to the list.

None

Returns:

Name Type Description
DefinitionDict DefinitionDict

A single definition dict representing all the data(and extra def dicts).

Source code in hed/models/column_mapper.py
def get_def_dict(self, hed_schema, extra_def_dicts=None) -> DefinitionDict:
    """ Return def dicts from every column description.

    Parameters:
        hed_schema (Schema): A HED schema object to use for extracting definitions.
        extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

    Returns:
       DefinitionDict:   A single definition dict representing all the data(and extra def dicts).
    """
    if self._sidecar:
        return self._sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

    return DefinitionDict(extra_def_dicts, hed_schema=hed_schema)

get_tag_columns

get_tag_columns()

Return the column numbers or names that are mapped to be HedTags.

Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.

Returns:

Name Type Description
column_identifiers list

A list of column numbers or names that are ColumnType.HedTags. 0-based if integer-based, otherwise column name.

Source code in hed/models/column_mapper.py
def get_tag_columns(self):
    """ Return the column numbers or names that are mapped to be HedTags.

        Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.

    Returns:
        column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
            0-based if integer-based, otherwise column name.
    """
    return [column_entry.column_name for number, column_entry in self._final_column_map.items()
            if column_entry.column_type == ColumnType.HEDTags]

get_transformers

get_transformers()

Return the transformers to use on a dataframe.

Returns:

Name Type Description
tuple (dict, list)

dict({str or int: func}): The functions to use to transform each column. need_categorical(list of int): A list of columns to treat as categorical.

Source code in hed/models/column_mapper.py
def get_transformers(self):
    """ Return the transformers to use on a dataframe.

        Returns:
            tuple(dict, list):
                dict({str or int: func}): The functions to use to transform each column.
                need_categorical(list of int): A list of columns to treat as categorical.
    """
    final_transformers = {}
    need_categorical = []
    for column in self._final_column_map.values():
        assign_to_column = column.column_name
        if isinstance(assign_to_column, int):
            if self._column_map:
                assign_to_column = self._column_map[assign_to_column]
            else:
                assign_to_column = assign_to_column
        if column.column_type == ColumnType.Ignore:
            continue
        elif column.column_type == ColumnType.Value:
            value_str = column.hed_dict
            from functools import partial
            final_transformers[assign_to_column] = partial(self._value_handler, value_str)
        elif column.column_type == ColumnType.Categorical:
            need_categorical.append(column.column_name)
            category_values = column.hed_dict
            from functools import partial
            final_transformers[assign_to_column] = partial(self._category_handler, category_values)
        else:
            final_transformers[assign_to_column] = lambda x: x

    return final_transformers, need_categorical

set_column_map

set_column_map(new_column_map=None) -> list[dict]

Set the column number to name mapping.

Parameters:

Name Type Description Default
new_column_map list or dict

Either an ordered list of the column names or column_number:column name. dictionary. In both cases, column numbers start at 0.

None

Returns:

Type Description
list[dict]

list[dict]: List of issues. Each issue is a dictionary.

Source code in hed/models/column_mapper.py
def set_column_map(self, new_column_map=None) -> list[dict]:
    """ Set the column number to name mapping.

    Parameters:
        new_column_map (list or dict):  Either an ordered list of the column names or column_number:column name.
            dictionary. In both cases, column numbers start at 0.

    Returns:
        list[dict]: List of issues. Each issue is a dictionary.

    """
    if new_column_map is None:
        new_column_map = {}
    if isinstance(new_column_map, dict):
        column_map = new_column_map
    # List like
    else:
        column_map = {column_number: column_name for column_number, column_name in enumerate(new_column_map)}
    self._column_map = column_map
    self._reverse_column_map = {column_name: column_number for column_number, column_name in column_map.items()}
    self._finalize_mapping()

set_column_prefix_dictionary

set_column_prefix_dictionary(
    column_prefix_dictionary, finalize_mapping=True
)

Set the column prefix dictionary.

Source code in hed/models/column_mapper.py
def set_column_prefix_dictionary(self, column_prefix_dictionary, finalize_mapping=True):
    """Set the column prefix dictionary. """
    self._column_prefix_dictionary = column_prefix_dictionary
    if finalize_mapping:
        self._finalize_mapping()

set_tag_columns

set_tag_columns(
    tag_columns=None,
    optional_tag_columns=None,
    finalize_mapping=True,
)

Set tag columns and optional tag columns.

Parameters:

Name Type Description Default
tag_columns list

A list of ints or strings containing the columns that contain the HED tags. If None, clears existing tag_columns

None
optional_tag_columns list

A list of ints or strings containing the columns that contain the HED tags, but not an error if missing. If None, clears existing tag_columns

None
finalize_mapping bool

Re-generate the internal mapping if True, otherwise no effect until finalize.

True
Source code in hed/models/column_mapper.py
def set_tag_columns(self, tag_columns=None, optional_tag_columns=None, finalize_mapping=True):
    """ Set tag columns and optional tag columns.

    Parameters:
        tag_columns (list): A list of ints or strings containing the columns that contain the HED tags.
                            If None, clears existing tag_columns
        optional_tag_columns (list): A list of ints or strings containing the columns that contain the HED tags,
                                     but not an error if missing.
                                     If None, clears existing tag_columns
        finalize_mapping (bool): Re-generate the internal mapping if True, otherwise no effect until finalize.
    """
    if tag_columns is None:
        tag_columns = []
    if optional_tag_columns is None:
        optional_tag_columns = []
    self._tag_columns = tag_columns
    self._optional_tag_columns = optional_tag_columns
    if finalize_mapping:
        self._finalize_mapping()

DefinitionDict

DefinitionDict

Gathers definitions from a single source.

Source code in hed/models/definition_dict.py
class DefinitionDict:
    """ Gathers definitions from a single source. """

    def __init__(self, def_dicts=None, hed_schema=None):
        """ Definitions to be considered a single source.

        Parameters:
            def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or
                a single string whose definitions should be added.
            hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

        :raises TypeError:
            - Bad type passed as def_dicts.
        """

        self.defs = {}
        self._issues = []
        if def_dicts:
            self.add_definitions(def_dicts, hed_schema)

    def add_definitions(self, def_dicts, hed_schema=None):
        """ Add definitions from dict(s) or strings(s) to this dict.

        Parameters:
            def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts
                                                            whose definitions should be added.
            hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

        Note - dict form expects DefinitionEntries in the same form as a DefinitionDict
                Note - str or list of strings will parse the strings using the hed_schema.
                Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input.

        :raises TypeError:
            - Bad type passed as def_dicts.
        """
        if not isinstance(def_dicts, list):
            def_dicts = [def_dicts]
        for def_dict in def_dicts:
            if isinstance(def_dict, (DefinitionDict, dict)):
                self._add_definitions_from_dict(def_dict)
            elif isinstance(def_dict, str) and hed_schema:
                self.check_for_definitions(HedString(def_dict, hed_schema))
            elif isinstance(def_dict, list) and hed_schema:
                for definition in def_dict:
                    self.check_for_definitions(HedString(definition, hed_schema))
            else:
                raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict")

    def _add_definition(self, def_tag, def_value):
        if def_tag in self.defs:
            error_context = self.defs[def_tag].source_context
            self._issues += ErrorHandler.format_error_from_context(DefinitionErrors.DUPLICATE_DEFINITION,
                error_context=error_context, def_name=def_tag, actual_error=DefinitionErrors.DUPLICATE_DEFINITION)
        else:
            self.defs[def_tag] = def_value

    def _add_definitions_from_dict(self, def_dict):
        """ Add the definitions found in the given definition dictionary to this mapper.

         Parameters:
             def_dict (DefinitionDict or dict): DefDict whose definitions should be added.

        """
        for def_tag, def_value in def_dict.items():
            self._add_definition(def_tag, def_value)

    def get(self, def_name) -> Union[DefinitionEntry, None]:
        """ Get the definition entry for the definition name.

            Not case-sensitive

        Parameters:
            def_name (str):  Name of the definition to retrieve.

        Returns:
            Union[DefinitionEntry, None]:  Definition entry for the requested definition.
        """
        return self.defs.get(def_name.casefold())

    def __iter__(self):
        return iter(self.defs)

    def __len__(self):
        return len(self.defs)

    def items(self):
        """ Return the dictionary of definitions.

            Alias for .defs.items()

        Returns:
            def_entries({str: DefinitionEntry}): A list of definitions.
        """
        return self.defs.items()

    @property
    def issues(self):
        """Return issues about duplicate definitions."""
        return self._issues

    def check_for_definitions(self, hed_string_obj, error_handler=None) -> list[dict]:
        """ Check string for definition tags, adding them to self.

        Parameters:
            hed_string_obj (HedString): A single HED string to gather definitions from.
            error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

        Returns:
            list[dict]:  List of issues encountered in checking for definitions. Each issue is a dictionary.
        """
        def_issues = []
        for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
            group_tag, new_def_issues = self._find_group(definition_tag, group, error_handler)
            def_tag_name, def_takes_value = self._strip_value_placeholder(definition_tag.extension)

            if "/" in def_tag_name or "#" in def_tag_name:
                new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                         DefinitionErrors.INVALID_DEFINITION_EXTENSION,
                                                                         tag=definition_tag,
                                                                         def_name=def_tag_name)

            if new_def_issues:
                def_issues += new_def_issues
                continue

            new_def_issues = self._validate_contents(definition_tag, group_tag, error_handler)
            new_def_issues += self._validate_placeholders(def_tag_name, group_tag, def_takes_value, error_handler)

            if new_def_issues:
                def_issues += new_def_issues
                continue

            new_def_issues, context = self._validate_name_and_context(def_tag_name, error_handler)
            if new_def_issues:
                def_issues += new_def_issues
                continue

            self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
                                                                 takes_value=def_takes_value,
                                                                 source_context=context)

        return def_issues

    @staticmethod
    def _strip_value_placeholder(def_tag_name):
        def_takes_value = def_tag_name.endswith("/#")
        if def_takes_value:
            def_tag_name = def_tag_name[:-len("/#")]
        return def_tag_name, def_takes_value

    def _validate_name_and_context(self, def_tag_name, error_handler):
        if error_handler:
            context = error_handler.error_context
        else:
            context = []
        new_def_issues = []
        if def_tag_name.casefold() in self.defs:
            new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.DUPLICATE_DEFINITION,
                                                                     def_name=def_tag_name)
        return new_def_issues, context

    @staticmethod
    def _validate_placeholders(def_tag_name, group, def_takes_value, error_handler):
        """ Check the definition for the correct placeholders (exactly 1 placeholder when takes value).

        Parameters:
            def_tag_name (str): The name of the definition without any Definition tag or value.
            group (HedGroup): The contents of the definition.
            def_takes_value (bool): True if the definition takes a value (should have #).
            error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

            Returns:
               list:  List of issues encountered in checking for definitions. Each issue is a dictionary.
        """
        new_issues = []
        placeholder_tags = []
        tags_with_issues = []

        # Find the tags that have # in their strings and return issues of count > 1.
        if group:
            for tag in group.get_all_tags():
                count = str(tag).count("#")
                if count:
                    placeholder_tags.append(tag)
                if count > 1:
                    tags_with_issues.append(tag)

        if tags_with_issues:
            new_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                 DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS,
                                                                 def_name=def_tag_name,
                                                                 tag_list=tags_with_issues,
                                                                 expected_count=1 if def_takes_value else 0)
        # Make sure placeholder count is correct.
        if (len(placeholder_tags) == 1) != def_takes_value:
            new_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                 DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS,
                                                                 def_name=def_tag_name,
                                                                 tag_list=placeholder_tags,
                                                                 expected_count=1 if def_takes_value else 0)
            return new_issues

        # Make sure that the tag with the placeholder is allowed to take a value.
        if def_takes_value:
            placeholder_tag = placeholder_tags[0]
            if not placeholder_tag.is_takes_value_tag():
                new_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.PLACEHOLDER_NO_TAKES_VALUE,
                                                                     def_name=def_tag_name,
                                                                     placeholder_tag=placeholder_tag)

        return new_issues

    @staticmethod
    def _find_group(definition_tag, group, error_handler):
        """ Check the definition for the correct placeholders (exactly 1 placeholder when takes value).

        Parameters:
            definition_tag (HedTag): The Definition tag itself.
            group (HedGroup): The entire definition group include the Definition tag.
            error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

            Returns:
               list:  List of issues encountered in checking for definitions. Each issue is a dictionary.
        """
        # initial validation
        groups = group.groups()
        issues = []
        if len(groups) > 1:
            issues += \
                ErrorHandler.format_error_with_context(error_handler,
                                                       DefinitionErrors.WRONG_NUMBER_GROUPS,
                                                       def_name=definition_tag.extension, tag_list=groups)
        elif len(groups) == 0 and '#' in definition_tag.extension:
            issues += \
                ErrorHandler.format_error_with_context(error_handler,
                                                       DefinitionErrors.NO_DEFINITION_CONTENTS,
                                                       def_name=definition_tag.extension)
        if len(group.tags()) != 1:
            issues += \
                ErrorHandler.format_error_with_context(error_handler,
                                                       DefinitionErrors.WRONG_NUMBER_TAGS,
                                                       def_name=definition_tag.extension,
                                                       tag_list=[tag for tag in group.tags()
                                                                 if tag is not definition_tag])

        group_tag = groups[0] if groups else None

        return group_tag, issues

    @staticmethod
    def _validate_contents(definition_tag, group, error_handler):
        issues = []
        if group:
            def_keys = {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY}
            for def_tag in group.find_tags(def_keys, recursive=True, include_groups=0):
                issues += ErrorHandler.format_error_with_context(error_handler,
                                                                 DefinitionErrors.DEF_TAG_IN_DEFINITION,
                                                                 tag=def_tag,
                                                                 def_name=definition_tag.extension)

            for tag in group.get_all_tags():
                if tag.has_attribute(HedKey.Unique) or tag.has_attribute(HedKey.Required):
                    issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.BAD_PROP_IN_DEFINITION,
                                                                     tag=tag,
                                                                     def_name=definition_tag.extension)

        return issues

    def get_definition_entry(self, def_tag):
        """ Get the entry for a given def tag.

            Does not validate at all.

        Parameters:
            def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.

        Returns:
            def_entry(DefinitionEntry or None): The definition entry if it exists
        """
        tag_label, _, placeholder = def_tag.extension.partition('/')

        label_tag_lower = tag_label.casefold()
        def_entry = self.defs.get(label_tag_lower)
        return def_entry

    def _get_definition_contents(self, def_tag):
        """ Get the contents for a given def tag.

            Does not validate at all.

        Parameters:
            def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.

        Returns:
            def_contents: HedGroup
            The contents to replace the previous def-tag with.
        """
        tag_label, _, placeholder = def_tag.extension.partition('/')

        label_tag_lower = tag_label.casefold()
        def_entry = self.defs.get(label_tag_lower)
        if def_entry is None:
            # Could raise an error here?
            return None

        def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
        return def_contents

    @staticmethod
    def get_as_strings(def_dict) -> dict[str, str]:
        """ Convert the entries to strings of the contents

        Parameters:
            def_dict (dict): A dict of definitions

        Returns:
            dict[str,str]: definition name and contents
        """
        if isinstance(def_dict, DefinitionDict):
            def_dict = def_dict.defs

        return {key: str(value.contents) for key, value in def_dict.items()}

issues property

issues

Return issues about duplicate definitions.

add_definitions

add_definitions(def_dicts, hed_schema=None)

Add definitions from dict(s) or strings(s) to this dict.

Parameters:

Name Type Description Default
def_dicts list, DefinitionDict, dict, or str

DefinitionDict or list of DefinitionDicts/strings/dicts whose definitions should be added.

required
hed_schema HedSchema or None

Required if passing strings or lists of strings, unused otherwise.

None

Note - dict form expects DefinitionEntries in the same form as a DefinitionDict Note - str or list of strings will parse the strings using the hed_schema. Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input.

:raises TypeError: - Bad type passed as def_dicts.

Source code in hed/models/definition_dict.py
def add_definitions(self, def_dicts, hed_schema=None):
    """ Add definitions from dict(s) or strings(s) to this dict.

    Parameters:
        def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts
                                                        whose definitions should be added.
        hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

    Note - dict form expects DefinitionEntries in the same form as a DefinitionDict
            Note - str or list of strings will parse the strings using the hed_schema.
            Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input.

    :raises TypeError:
        - Bad type passed as def_dicts.
    """
    if not isinstance(def_dicts, list):
        def_dicts = [def_dicts]
    for def_dict in def_dicts:
        if isinstance(def_dict, (DefinitionDict, dict)):
            self._add_definitions_from_dict(def_dict)
        elif isinstance(def_dict, str) and hed_schema:
            self.check_for_definitions(HedString(def_dict, hed_schema))
        elif isinstance(def_dict, list) and hed_schema:
            for definition in def_dict:
                self.check_for_definitions(HedString(definition, hed_schema))
        else:
            raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict")

check_for_definitions

check_for_definitions(
    hed_string_obj, error_handler=None
) -> list[dict]

Check string for definition tags, adding them to self.

Parameters:

Name Type Description Default
hed_string_obj HedString

A single HED string to gather definitions from.

required
error_handler ErrorHandler or None

Error context used to identify where definitions are found.

None

Returns:

Type Description
list[dict]

list[dict]: List of issues encountered in checking for definitions. Each issue is a dictionary.

Source code in hed/models/definition_dict.py
def check_for_definitions(self, hed_string_obj, error_handler=None) -> list[dict]:
    """ Check string for definition tags, adding them to self.

    Parameters:
        hed_string_obj (HedString): A single HED string to gather definitions from.
        error_handler (ErrorHandler or None): Error context used to identify where definitions are found.

    Returns:
        list[dict]:  List of issues encountered in checking for definitions. Each issue is a dictionary.
    """
    def_issues = []
    for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
        group_tag, new_def_issues = self._find_group(definition_tag, group, error_handler)
        def_tag_name, def_takes_value = self._strip_value_placeholder(definition_tag.extension)

        if "/" in def_tag_name or "#" in def_tag_name:
            new_def_issues += ErrorHandler.format_error_with_context(error_handler,
                                                                     DefinitionErrors.INVALID_DEFINITION_EXTENSION,
                                                                     tag=definition_tag,
                                                                     def_name=def_tag_name)

        if new_def_issues:
            def_issues += new_def_issues
            continue

        new_def_issues = self._validate_contents(definition_tag, group_tag, error_handler)
        new_def_issues += self._validate_placeholders(def_tag_name, group_tag, def_takes_value, error_handler)

        if new_def_issues:
            def_issues += new_def_issues
            continue

        new_def_issues, context = self._validate_name_and_context(def_tag_name, error_handler)
        if new_def_issues:
            def_issues += new_def_issues
            continue

        self.defs[def_tag_name.casefold()] = DefinitionEntry(name=def_tag_name, contents=group_tag,
                                                             takes_value=def_takes_value,
                                                             source_context=context)

    return def_issues

get

get(def_name) -> Union[DefinitionEntry, None]

Get the definition entry for the definition name.

Not case-sensitive

Parameters:

Name Type Description Default
def_name str

Name of the definition to retrieve.

required

Returns:

Type Description
Union[DefinitionEntry, None]

Union[DefinitionEntry, None]: Definition entry for the requested definition.

Source code in hed/models/definition_dict.py
def get(self, def_name) -> Union[DefinitionEntry, None]:
    """ Get the definition entry for the definition name.

        Not case-sensitive

    Parameters:
        def_name (str):  Name of the definition to retrieve.

    Returns:
        Union[DefinitionEntry, None]:  Definition entry for the requested definition.
    """
    return self.defs.get(def_name.casefold())

get_as_strings staticmethod

get_as_strings(def_dict) -> dict[str, str]

Convert the entries to strings of the contents

Parameters:

Name Type Description Default
def_dict dict

A dict of definitions

required

Returns:

Type Description
dict[str, str]

dict[str,str]: definition name and contents

Source code in hed/models/definition_dict.py
@staticmethod
def get_as_strings(def_dict) -> dict[str, str]:
    """ Convert the entries to strings of the contents

    Parameters:
        def_dict (dict): A dict of definitions

    Returns:
        dict[str,str]: definition name and contents
    """
    if isinstance(def_dict, DefinitionDict):
        def_dict = def_dict.defs

    return {key: str(value.contents) for key, value in def_dict.items()}

get_definition_entry

get_definition_entry(def_tag)

Get the entry for a given def tag.

Does not validate at all.

Parameters:

Name Type Description Default
def_tag HedTag

Source HED tag that may be a Def or Def-expand tag.

required

Returns:

Name Type Description
def_entry DefinitionEntry or None

The definition entry if it exists

Source code in hed/models/definition_dict.py
def get_definition_entry(self, def_tag):
    """ Get the entry for a given def tag.

        Does not validate at all.

    Parameters:
        def_tag (HedTag): Source HED tag that may be a Def or Def-expand tag.

    Returns:
        def_entry(DefinitionEntry or None): The definition entry if it exists
    """
    tag_label, _, placeholder = def_tag.extension.partition('/')

    label_tag_lower = tag_label.casefold()
    def_entry = self.defs.get(label_tag_lower)
    return def_entry

items

items()

Return the dictionary of definitions.

Alias for .defs.items()

Returns:

Name Type Description
def_entries {str: DefinitionEntry}

A list of definitions.

Source code in hed/models/definition_dict.py
def items(self):
    """ Return the dictionary of definitions.

        Alias for .defs.items()

    Returns:
        def_entries({str: DefinitionEntry}): A list of definitions.
    """
    return self.defs.items()