Source code for hed.tools.analysis.events_summary

"""Summarizes events in a tabular file by checking tag coverage and quality."""

from hed import TabularInput
from hed.errors import ErrorHandler
from hed.errors.error_types import TagQualityErrors
from hed.tools.analysis.event_checker import EventsChecker


[docs] class EventsSummary: """Summarizes HED event annotations for a tabular file, grouping tags by stimulus/response categories.""" # Excluding tags for condition-variables and task -- these can be done separately if we want to. REMOVE_TYPES = ["Condition-variable", "Task"] # Tags organized by whether they are found with either of these MATCH_TYPES = [ "Experimental-stimulus", "Participant-response", "Cue", "Feedback", "Instructional", "Sensory-event", "Agent-action", ] # If a tag has any of these as a parent, it is excluded EXCLUDED_PARENTS = { "data-marker", "data-resolution", "quantitative-value", "spatiotemporal-value", "statistical-value", "informational-property", "organizational-property", "grayscale", "hsv-color", "rgb-color", "luminance", "luminance-contrast", "opacity", "task-effect-evidence", "task-relationship", "relation", } # If a tag has any of these as a parent, it is replaced by this parent only CUTOFF_TAGS = { "blue-color", "brown-color", "cyan-color", "gray-color", "green-color", "orange-color", "pink-color", "purple-color", "red-color", "white-color", "yellow-color", "visual-presentation", } # These tags are removed at the end as non-informational FILTERED_TAGS = { "event", "agent", "action", "move-body-part", "item", "biological-item", "anatomical-item", "body-part", "lower-extremity-part", "upper-extremity-part", "head-part", "torso-part", "face-part", "language-item", "object", "geometric-object", "man-made-object", "device", "computing-device", "io-device", "input-device", "output-device", "auditory-device", "display-device", "recording-device", "natural-object", "document", "media", "media-clip", "visualization", "property", "agent-property", "agent-state", "agent-cognitive-state", "agent-emotional-state", "agent-physiological-state", "agent-postural-state", "agent-task-role", "agent-trait", "data-property", "biological-artifact", "nonbiological-artifact", "spatial-property", "temporal-property", "spectral-property", "dara-source-type", "data-value", "categorical-value", "categorical-class-value", "categorical-judgment-value", "categorical-level-value", "categorical-location-value", "categorical-orientation-value", "physical-value", "data-variability-attribute", "environmental-property", "sensory-property", "sensory-attribute", "auditory-attribute", "gustatory-attribute", "olfactory-attribute", "tactile-attribute", "visual-attribute", "sensory-presentation", "task-property", "task-action-type", "task-attentional-demand", "task-event-role", "task-stimulus-role", } def __init__(self, hed_schema, file, sidecar=None, name=None): """Constructor for the HedString class.""" self.checker = None self.fatal_errors = False self._initialize(hed_schema, file, sidecar, name) def _initialize(self, hed_schema, file, sidecar, name): self.input_data = TabularInput(file, sidecar, name) errors = self.input_data.validate(hed_schema, error_handler=ErrorHandler(check_for_warnings=False)) if errors: self.fatal_errors = True return self.checker = EventsChecker(hed_schema, self.input_data, name) self.issues = self.checker.validate_event_tags() self.error_lines = EventsChecker.get_error_lines(self.issues)
[docs] def extract_tag_summary(self): """Extract a summary of the tags in a given tabular input file. Returns: tuple[dict, list]: - dict: A dictionary with the summary information - (str, list) - list: A set of tags that do not match any of the specified types but are not excluded. """ group_dict = {key: set() for key in self.MATCH_TYPES} other = set() group_error_lines = self.error_lines.get(TagQualityErrors.IMPROPER_EVENT_GROUPS, []) for index, hed_obj in enumerate(self.checker.hed_objs): if not hed_obj or index in group_error_lines: continue all_tags = hed_obj.get_all_tags() found = False for key, _tags in group_dict.items(): if self.match_tags(all_tags, key): group_dict[key] = self.update_tags(group_dict[key], all_tags) found = True break if not found: other = self.update_tags(other, all_tags) for key, tags in group_dict.items(): group_dict[key] = sorted(tags - self.FILTERED_TAGS) other = sorted(other - self.FILTERED_TAGS) return group_dict, other
[docs] @staticmethod def match_tags(all_tags, key): """Return True if any tag in all_tags has a short_base_tag matching key. Parameters: all_tags (list[HedTag]): The tags to search. key (str): The short base tag name to look for. Returns: bool: True if a match is found. """ return any(tag.short_base_tag == key for tag in all_tags)
[docs] def update_tags(self, tag_set, all_tags): """Add the most-specific ancestor tag names from all_tags into tag_set, respecting cutoff categories. Parameters: tag_set (set): The running set of tag terms to update. all_tags (list[HedTag]): Tags to process. Returns: set: The updated tag_set. """ for tag in all_tags: terms = tag.tag_terms if any(item in self.EXCLUDED_PARENTS for item in terms): continue match = next((item for item in terms if item in self.CUTOFF_TAGS), None) if match: tag_set.add(match) else: tag_set.update(tag.tag_terms) return tag_set
def summarize_tags(schema, tsv, sidecar, name): """Summarize the tags in a given tabular input file. Parameters: schema: The HED schema to use for validation. tsv: The path to the input file. sidecar: The path to the sidecar file (optional). name: The name of the dataset (optional). Returns: tuple[dict, list]: - dict: A dictionary with the summary information - (str, list). - list: A set of tags that do not match any of the specified types but are not excluded. """ events_summary = EventsSummary(schema, tsv, sidecar, name) if events_summary.fatal_errors: return None summary, others = events_summary.extract_tag_summary() return summary