Skip to content

Schema

HED schema management, loading, and validation functionality.

HedSchema

HedSchema

Bases: HedSchemaBase

A HED schema suitable for processing.

Source code in hed/schema/hed_schema.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
class HedSchema(HedSchemaBase):
    """ A HED schema suitable for processing. """

    def __init__(self):
        """ Constructor for the HedSchema class.

            A HedSchema can be used for validation, checking tag attributes, parsing tags, etc.
        """
        super().__init__()
        self.header_attributes = {}
        self.filename = None
        self.prologue = ""
        self.epilogue = ""
        self.extras = {} # Used to store any additional data that might be needed for serialization (like OWL or other formats)

        # This is the specified library name_prefix - tags will be {schema_namespace}:{tag_name}
        self._namespace = ""

        self._sections = self._create_empty_sections()
        self.source_format = None  # The type of file this was loaded from(mediawiki, xml, or owl - None if mixed)

    # ===============================================
    # Basic schema properties
    # ===============================================
    @property
    def version_number(self) -> str:
        """ The HED version of this schema.

        Returns:
            str: The version of this schema.
        """
        return self.header_attributes['version']

    @property
    def version(self) -> str:
        """ The complete schema version, including prefix and library name(if applicable).

        Returns:
            str: The complete schema version including library name and namespace.
        """
        libraries = self.library.split(",")
        versions = self.version_number.split(",")
        namespace = self._namespace
        combined_versions = [f"{namespace}{version}" if not library else f"{namespace}{library}_{version}"
                             for library, version in zip(libraries, versions)]

        return ",".join(combined_versions)

    @property
    def library(self) -> str:
        """ The name of this library schema if one exists.

        Returns:
            str: Library name if any.
        """
        return self.header_attributes.get(LIBRARY_ATTRIBUTE, "")

    @property
    def schema_namespace(self) -> str:
        """ Returns the schema namespace prefix.

        Returns:
            str: The schema namespace prefix.
        """
        return self._namespace

    def can_save(self) -> bool:
        """ Returns if it's legal to save this schema.

        You cannot save schemas loaded as merged from multiple library schemas.

        Returns:
            bool: True if this can be saved.
        """
        return not self.library or "," not in self.library

    @property
    def with_standard(self) -> str:
        """ The version of the base schema this is extended from, if it exists.

        Returns:
            str: HED version or empty string.
        """
        return self.header_attributes.get(WITH_STANDARD_ATTRIBUTE, "")

    @property
    def merged(self) -> bool:
        """ Returns if this schema was loaded from a merged file.

        Returns:
            bool: True if file was loaded from a merged file.
        """
        return not self.header_attributes.get(UNMERGED_ATTRIBUTE, "")

    @property
    def tags(self) -> "HedSchemaTagSection":
        """ Return the tag schema section.

        Returns:
            HedSchemaTagSection: The tag section.
        """
        return self._sections[HedSectionKey.Tags]

    @property
    def unit_classes(self) -> "HedSchemaUnitClassSection":
        """ Return the unit classes schema section.

        Returns:
            HedSchemaUnitClassSection: The unit classes section.
        """
        return self._sections[HedSectionKey.UnitClasses]

    @property
    def units(self) -> "HedSchemaUnitSection":
        """ Return the unit schema section.

        Returns:
            HedSchemaUnitSection: The unit section.
        """
        return self._sections[HedSectionKey.Units]

    @property
    def unit_modifiers(self) -> "HedSchemaSection":
        """ Return the modifiers classes schema section.

        Returns:
            HedSchemaSection: The unit modifiers section.
        """
        return self._sections[HedSectionKey.UnitModifiers]

    @property
    def value_classes(self) -> "HedSchemaSection":
        """ Return the value classes schema section.

        Returns:
            HedSchemaSection: The value classes section.
        """
        return self._sections[HedSectionKey.ValueClasses]

    @property
    def attributes(self) -> "HedSchemaSection":
        """ Return the attributes schema section.

        Returns:
            HedSchemaSection: The attributes section.
        """
        return self._sections[HedSectionKey.Attributes]

    @property
    def properties(self) -> "HedSchemaSection":
        """ Return the properties schema section.

        Returns:
            HedSchemaSection: The properties section.
        """
        return self._sections[HedSectionKey.Properties]

    def get_schema_versions(self) -> list[str]:
        """ A list of HED version strings including namespace and library name if any of this schema.

        Returns:
            list[str]: The complete version of this schema including library name and namespace.
        """
        return [self.get_formatted_version()]

    def get_formatted_version(self) -> str:
        """ The HED version string including namespace and library name if any of this schema.

        Returns:
            str: A json formatted string of the complete version of this schema including library name and namespace.
        """
        return json.dumps(self.version)

    def get_save_header_attributes(self, save_merged: bool = False) -> dict:
        """ Returns the attributes that should be saved.

        Parameters:
            save_merged (bool): Whether to save as merged schema.

        Returns:
            dict: The header attributes dictionary.
        """
        sort_to_start = "!!!!!!!!!!!!!!"
        header_attributes = dict(sorted(self.header_attributes.items(),
                                        key=lambda x: sort_to_start if x[0] == VERSION_ATTRIBUTE else x[0],
                                        reverse=False))
        if save_merged:
            header_attributes.pop(UNMERGED_ATTRIBUTE, None)
        else:
            # make sure it's the last attribute(just to make sure it's in an order)
            header_attributes.pop(UNMERGED_ATTRIBUTE, None)
            header_attributes[UNMERGED_ATTRIBUTE] = "True"

        return header_attributes

    def schema_for_namespace(self, namespace: str) -> Union["HedSchema", None]:
        """ Return HedSchema object for this namespace.

        Parameters:
            namespace (str): The schema library name namespace.

        Returns:
            HedSchema or None: The HED schema object for this schema, or None if namespace doesn't match.
        """
        if self._namespace != namespace:
            return None
        return self

    @property
    def valid_prefixes(self) -> list[str]:
        """ Return a list of all prefixes this schema will accept

        Returns:
            list[str]:   A list of valid tag prefixes for this schema.

        Notes:
            - The return value is always length 1 if using a HedSchema.
        """
        return [self._namespace]

    def get_extras(self, extras_key) -> Union[DataFrame, None]:
        """ Get the extras corresponding to the given key

        Parameters:
            extras_key (str): The key to check for in the extras dictionary.

        Returns:
            Union[DataFrame, None]: The DataFrame for this extras key, or None if it doesn't exist or is empty.
        """
        if not hasattr(self, 'extras') or not extras_key in self.extras:
            return None
        externals = self.extras[extras_key]
        if externals.empty:
            return None
        return externals

    # ===============================================
    # Creation and saving functions
    # ===============================================

    # todo: we may want to collapse these 6 functions into one like this
    # def serialize(self, filename=None, save_merged=False, file_format=whatever is default):
    #     pass

    def get_as_mediawiki_string(self, save_merged=False) -> str:
        """ Return the schema to a mediawiki string.

        Parameters:
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.

        Returns:
            str: The schema as a string in mediawiki format.

        """
        output_strings = Schema2Wiki().process_schema(self, save_merged)
        return '\n'.join(output_strings)

    def get_as_xml_string(self, save_merged=True) -> str:
        """ Return the schema to an XML string.

        Parameters:
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.

        Returns:
            str: The schema as an XML string.

        """
        xml_tree = Schema2XML().process_schema(self, save_merged)
        return schema_util.xml_element_2_str(xml_tree)

    def get_as_dataframes(self, save_merged=False) -> dict[DataFrame]:
        """ Get a dict of dataframes representing this file

        Parameters:
            save_merged (bool): If True, returns DFs as if merged with standard.

        Returns:
            dict[DataFrame]: A dict of dataframes you can load as a schema.
        """
        output_dfs = Schema2DF().process_schema(self, save_merged)
        return output_dfs

    def save_as_mediawiki(self, filename, save_merged=False):
        """ Save as mediawiki to a file.

        Parameters:
            filename (str): Save location.
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.


        Raises:
            OSError: File cannot be saved for some reason.
        """
        output_strings = Schema2Wiki().process_schema(self, save_merged)
        with open(filename, mode='w', encoding='utf-8') as opened_file:
            for string in output_strings:
                opened_file.write(string)
                opened_file.write('\n')

    def save_as_xml(self, filename, save_merged=True):
        """ Save as XML to a file.

        Parameters:
            filename (str): Save location.
            save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.


        Raises:
            OSError: File cannot be saved for some reason.
        """
        xml_tree = Schema2XML().process_schema(self, save_merged)
        with open(filename, mode='w', encoding='utf-8') as opened_file:
            xml_string = schema_util.xml_element_2_str(xml_tree)
            opened_file.write(xml_string)

    def save_as_dataframes(self, base_filename, save_merged=False):
        """ Save as dataframes to a folder of files.

            If base_filename has a .tsv suffix, save directly to the indicated location.
            If base_filename is a directory(does NOT have a .tsv suffix), save the contents into a directory named that.
            The subfiles are named the same.  e.g. HED8.3.0/HED8.3.0_Tag.tsv

        Parameters:
            base_filename (str): Save filename. A suffix will be added to most, e.g. _Tag
            save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                                If it is not a "withStandard" schema, this setting has no effect.


        Raises:
            OSError: File cannot be saved for some reason.
        """
        output_dfs = Schema2DF().process_schema(self, save_merged)
        if hasattr(self, 'extras') and self.extras:
           output_dfs.update(self.extras)
        df_util.save_dataframes(base_filename, output_dfs)

    def set_schema_prefix(self, schema_namespace):
        """ Set library namespace associated for this schema.

        Parameters:
            schema_namespace (str): Should be empty, or end with a colon.(Colon will be automated added if missing).

        :raises HedFileError:
            - The prefix is invalid
        """
        if schema_namespace and schema_namespace[-1] != ":":
            schema_namespace += ":"

        if schema_namespace and not schema_namespace[:-1].isalpha():
            raise HedFileError(HedExceptions.INVALID_LIBRARY_PREFIX,
                               "Schema namespace must contain only alpha characters",
                               self.filename)

        self._namespace = schema_namespace

    def __eq__(self, other):
        """ Return True if these schema match exactly.

        Parameters:
            other (HedSchema): The schema to be compared.

        Returns:
            bool: True if other exactly matches this schema.

        Notes:
            - Matches must include attributes, tag names, etc.

        """
        if other is None:
            return False
        if self.get_save_header_attributes() != other.get_save_header_attributes():
            # print(f"Header attributes not equal: '{self.get_save_header_attributes()}' vs '{other.get_save_header_attributes()}'")
            return False
        if self.has_duplicates() != other.has_duplicates():
            # print(f"Duplicates: '{self.has_duplicates()}' vs '{other.has_duplicates()}'")
            return False
        if self.prologue.strip() != other.prologue.strip():
            # print(f"PROLOGUE NOT EQUAL: '{self.prologue.strip()}' vs '{other.prologue.strip()}'")
            return False
        if self.epilogue.strip() != other.epilogue.strip():
            # print(f"EPILOGUE NOT EQUAL: '{self.epilogue.strip()}' vs '{other.epilogue.strip()}'")
            return False
        if self._sections != other._sections:
            # This block is useful for debugging when modifying the schema class itself.
            # for section1, section2 in zip(self._sections.values(), other._sections.values()):
            #     if section1 != section2:
            #         dict1 = section1.all_names
            #         dict2 = section2.all_names
            #         if dict1 != dict2:
            #             print(f"DICT {section1._section_key} NOT EQUAL")
            #             key_union = set(list(dict1.keys()) + list(dict2.keys()))
            #             for key in key_union:
            #                 if key not in dict1:
            #                     print(f"{key} not in dict1")
            #                     continue
            #                 if key not in dict2:
            #                     print(f"{key} not in dict2")
            #                     continue
            #                 if dict1[key] != dict2[key]:
            #                     s = f"{key} unmatched: '{str(dict1[key].name)}' vs '{str(dict2[key].name)}'"
            #                     print(s)
            return False
        if self._namespace != other._namespace:
            # print(f"NAMESPACE NOT EQUAL: '{self._namespace}' vs '{other._namespace}'")
            return False
        return True

    def __getitem__(self, section_key):
        return self._sections[section_key]

    def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
        """ Check for HED3 compliance of this schema.

        Parameters:
            check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
            name (str): If present, use as the filename for context, rather than using the actual filename.
                        Useful for temp filenames when supporting web services.
            error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

        Returns:
            list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
        """
        from hed.schema import schema_compliance
        return schema_compliance.check_compliance(self, check_for_warnings, name, error_handler)

    def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list["HedSchemaEntry"]:
        """ Return tag entries with the given attribute.

        Parameters:
            attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
            key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

        Returns:
            list[HedSchemaEntry]: A list of all tags with this attribute.

        Notes:
            - The result is cached so will be fast after first call.
        """
        return self._sections[key_class].get_entries_with_attribute(attribute, return_name_only=True,
                                                                    schema_namespace=self._namespace)

    def get_tag_entry(self, name: str, key_class=HedSectionKey.Tags, schema_namespace: str = "") -> Union["HedSchemaEntry", None]:
        """ Return the schema entry for this tag, if one exists.

        Parameters:
            name (str): Any form of basic tag(or other section entry) to look up.
                This will not handle extensions or similar.
                If this is a tag, it can have a schema namespace, but it's not required
            key_class (HedSectionKey or str):  The type of entry to return.
            schema_namespace (str): Only used on Tags.  If incorrect, will return None.

        Returns:
            HedSchemaEntry or None: The schema entry for the given tag, or None if not found.
        """
        if key_class == HedSectionKey.Tags:
            if schema_namespace != self._namespace:
                return None
            if name.startswith(self._namespace):
                name = name[len(self._namespace):]

        return self._get_tag_entry(name, key_class)

    def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
        """ Find the schema entry for a given source tag.

        Parameters:
            tag (str, HedTag):     Any form of tag to look up.  Can have an extension, value, etc.
            schema_namespace (str):  The schema namespace of the tag, if any.

        Returns:
            tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
            - The located tag entry for this tag.
            - The remainder of the tag that isn't part of the base tag.
            - A list of errors while converting.

        Notes:
            Works left to right (which is mostly relevant for errors).

        """
        if schema_namespace != self._namespace:
            validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                          schema_namespace, self.valid_prefixes)
            return None, None, validation_issues
        return self._find_tag_entry(tag, schema_namespace)


    # ===============================================
    # Private utility functions for getting/finding tags
    # ===============================================
    def _get_tag_entry(self, name, key_class=HedSectionKey.Tags):
        """ Return the schema entry for this tag, if one exists.

        Parameters:
            name (str): Any form of basic tag(or other section entry) to look up.
                This will not handle extensions or similar.
            key_class (HedSectionKey or str):  The type of entry to return.

        Returns:
            HedSchemaEntry: The schema entry for the given tag.

        """
        return self._sections[key_class].get(name)

    def _find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
        """ Find the schema entry for a given source tag.

        Parameters:
            tag (str, HedTag):     Any form of tag to look up.  Can have an extension, value, etc.
            schema_namespace (str):  The schema namespace of the tag, if any.

        Returns:
            tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
            - The located tag entry for this tag.
            - The remainder of the tag that isn't part of the base tag.
            - A list of errors while converting.

        Notes:
            Works left to right (which is mostly relevant for errors).

        """
        clean_tag = str(tag)
        namespace = schema_namespace
        clean_tag = clean_tag[len(namespace):]
        working_tag = clean_tag.casefold()

        # Most tags are in the schema directly, so test that first
        found_entry = self._get_tag_entry(working_tag)
        if found_entry:
            # this handles the one special case where the actual tag contains "/#" instead of something specific.
            if working_tag.endswith("/#"):
                remainder = working_tag[-2:]
            else:
                remainder = ""

            return found_entry, remainder, []

        prefix_tag_adj = len(namespace)

        try:
            found_entry, current_slash_index = self._find_tag_subfunction(tag, working_tag, prefix_tag_adj)
        except self._TagIdentifyError as e:
            issue = e.issue
            return None, None, issue

        remainder = None
        if current_slash_index != -1:
            remainder = clean_tag[current_slash_index:]
        if remainder and found_entry.takes_value_child_entry:
            found_entry = found_entry.takes_value_child_entry

        return found_entry, remainder, []

    def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj):
        """Finds the base tag and remainder from the left, raising exception on issues"""
        current_slash_index = -1
        current_entry = None
        # Loop left to right, checking each word.  Once we find an invalid word, we stop.
        while True:
            next_index = working_tag.find("/", current_slash_index + 1)
            if next_index == -1:
                next_index = len(working_tag)
            parent_name = working_tag[:next_index]
            parent_entry = self._get_tag_entry(parent_name)

            if not parent_entry:
                # We haven't found any tag at all yet
                if current_entry is None:
                    error = ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
                                                      tag,
                                                      index_in_tag=prefix_tag_adj,
                                                      index_in_tag_end=prefix_tag_adj + next_index)
                    raise self._TagIdentifyError(error)
                # If this is not a takes value node, validate each term in the remainder.
                if not current_entry.takes_value_child_entry:
                    # This will raise _TagIdentifyError on any issues
                    self._validate_remaining_terms(tag, working_tag, prefix_tag_adj, current_slash_index)
                break

            current_entry = parent_entry
            current_slash_index = next_index
            if next_index == len(working_tag):
                break

        return current_entry, current_slash_index

    def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index):
        """ Validates the terms past current_slash_index.

        :raises _TagIdentifyError:
            - One of the extension terms already exists as a schema term.
        """
        child_names = working_tag[current_slash_index + 1:].split("/")
        word_start_index = current_slash_index + 1 + prefix_tag_adj
        for name in child_names:
            if self._get_tag_entry(name):
                error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
                                                  tag,
                                                  index_in_tag=word_start_index,
                                                  index_in_tag_end=word_start_index + len(name),
                                                  expected_parent_tag=self.tags[name].name)
                raise self._TagIdentifyError(error)
            word_start_index += len(name) + 1

    def has_duplicates(self):
        """Returns the first duplicate tag/unit/etc. if any section has a duplicate name"""
        for section in self._sections.values():
            has_duplicates = bool(section.duplicate_names)
            if has_duplicates:
                # Return first entry of dict
                return next(iter(section.duplicate_names))

        return False

    # ===============================================
    # Semi-private creation finalizing functions
    # ===============================================
    def finalize_dictionaries(self):
        """ Call to finish loading. """
        # Kludge - Reset this here so it recalculates while having all properties
        self._schema83 = None
        self._update_all_entries()

    def _update_all_entries(self):
        """ Call finalize_entry on every schema entry(tag, unit, etc). """
        for key_class, section in self._sections.items():
            self._initialize_attributes(key_class)
            section._finalize_section(self)

    def _initialize_attributes(self, key_class):
        """ Set the valid attributes for a section.

        Parameters:
            key_class (HedSectionKey): The section key for the section to update.

        """
        self._sections[key_class].valid_attributes = self._get_attributes_for_section(key_class)

    # ===============================================
    # Getters used to write out schema primarily.
    # ===============================================
    def get_tag_attribute_names_old(self) -> dict[str, HedSchemaEntry]:
        """ Return a dict of all allowed tag attributes.

        Returns:
            dict[str, HedSchemaEntry]: A dictionary whose keys are attribute names and values are HedSchemaEntry object.

        """
        return {tag_entry.name: tag_entry for tag_entry in self._sections[HedSectionKey.Attributes].values()
                if not tag_entry.has_attribute(HedKeyOld.UnitClassProperty)
                and not tag_entry.has_attribute(HedKeyOld.UnitProperty)
                and not tag_entry.has_attribute(HedKeyOld.UnitModifierProperty)
                and not tag_entry.has_attribute(HedKeyOld.ValueClassProperty)}

    # ===============================================
    # Private utility functions
    # ===============================================
    @staticmethod
    def _create_empty_sections():
        dictionaries = {}
        # Add main sections
        dictionaries[HedSectionKey.Properties] = HedSchemaSection(HedSectionKey.Properties)
        dictionaries[HedSectionKey.Attributes] = HedSchemaSection(HedSectionKey.Attributes)
        dictionaries[HedSectionKey.UnitModifiers] = HedSchemaSection(HedSectionKey.UnitModifiers)
        dictionaries[HedSectionKey.Units] = HedSchemaUnitSection(HedSectionKey.Units)
        dictionaries[HedSectionKey.UnitClasses] = HedSchemaUnitClassSection(HedSectionKey.UnitClasses)
        dictionaries[HedSectionKey.ValueClasses] = HedSchemaSection(HedSectionKey.ValueClasses)
        dictionaries[HedSectionKey.Tags] = HedSchemaTagSection(HedSectionKey.Tags, case_sensitive=False)

        return dictionaries

    def _get_modifiers_for_unit(self, unit):
        """ Return the valid modifiers for the given unit

        Parameters:
            unit (str): A known unit.

        Returns:
            derived_unit_list(list of HedSchemaEntry): The derived units for this unit

        Notes:
            This is a lower level one that doesn't rely on the Unit entries being fully setup.
        """
        unit_entry = self.get_tag_entry(unit, HedSectionKey.Units)
        if unit_entry is None:
            return []
        is_si_unit = unit_entry.has_attribute(HedKey.SIUnit)
        is_unit_symbol = unit_entry.has_attribute(HedKey.UnitSymbol)
        if not is_si_unit:
            return []
        if is_unit_symbol:
            modifier_attribute_name = HedKey.SIUnitSymbolModifier
        else:
            modifier_attribute_name = HedKey.SIUnitModifier
        valid_modifiers = self.unit_modifiers.get_entries_with_attribute(modifier_attribute_name)
        return valid_modifiers

    def _add_element_property_attributes(self, attribute_dict, attribute_name):
        attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items()
                      if entry.has_attribute(attribute_name)}

        attribute_dict.update(attributes)

    def _get_attributes_for_section(self, key_class):
        """Return the valid attributes for this section.

        Parameters:
            key_class (HedSectionKey): The HedKey for this section.

        Returns:
            dict: A dict of all the attributes for this section.
        """
        element_prop_key = HedKey.ElementDomain if self.schema_83_props else HedKeyOld.ElementProperty

        # Common logic for Attributes and Properties
        if key_class in [HedSectionKey.Attributes, HedSectionKey.Properties]:
            prop_added_dict = {}
            if key_class == HedSectionKey.Attributes:
                prop_added_dict = {key: value for key, value in self._sections[HedSectionKey.Properties].items()}
            self._add_element_property_attributes(prop_added_dict, element_prop_key)
            return prop_added_dict

        if self.schema_83_props:
            attrib_classes = {
                HedSectionKey.UnitClasses: HedKey.UnitClassDomain,
                HedSectionKey.Units: HedKey.UnitDomain,
                HedSectionKey.UnitModifiers: HedKey.UnitModifierDomain,
                HedSectionKey.ValueClasses: HedKey.ValueClassDomain,
                HedSectionKey.Tags: HedKey.TagDomain
            }
        else:
            attrib_classes = {
                HedSectionKey.UnitClasses: HedKeyOld.UnitClassProperty,
                HedSectionKey.Units: HedKeyOld.UnitProperty,
                HedSectionKey.UnitModifiers: HedKeyOld.UnitModifierProperty,
                HedSectionKey.ValueClasses: HedKeyOld.ValueClassProperty
            }
            if key_class == HedSectionKey.Tags:
                return self.get_tag_attribute_names_old()

        # Retrieve attributes based on the determined class
        attrib_class = attrib_classes.get(key_class)
        if not attrib_class:
            return []

        attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items()
                      if entry.has_attribute(attrib_class) or entry.has_attribute(element_prop_key)}
        return attributes

    # ===============================================
    # Semi private function used to create a schema in memory(usually from a source file)
    # ===============================================
    def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
        section = self._sections[key_class]
        return section._add_to_dict(long_tag_name, new_entry)

    def _create_tag_entry(self, long_tag_name, key_class):
        section = self._sections[key_class]
        return section._create_tag_entry(long_tag_name)

    class _TagIdentifyError(Exception):
        """Used internally to note when a tag cannot be identified."""
        def __init__(self, issue):
            self.issue = issue

attributes property

attributes: 'HedSchemaSection'

Return the attributes schema section.

Returns:

Name Type Description
HedSchemaSection 'HedSchemaSection'

The attributes section.

library property

library: str

The name of this library schema if one exists.

Returns:

Name Type Description
str str

Library name if any.

merged property

merged: bool

Returns if this schema was loaded from a merged file.

Returns:

Name Type Description
bool bool

True if file was loaded from a merged file.

properties property

properties: 'HedSchemaSection'

Return the properties schema section.

Returns:

Name Type Description
HedSchemaSection 'HedSchemaSection'

The properties section.

schema_namespace property

schema_namespace: str

Returns the schema namespace prefix.

Returns:

Name Type Description
str str

The schema namespace prefix.

tags property

tags: 'HedSchemaTagSection'

Return the tag schema section.

Returns:

Name Type Description
HedSchemaTagSection 'HedSchemaTagSection'

The tag section.

unit_classes property

unit_classes: 'HedSchemaUnitClassSection'

Return the unit classes schema section.

Returns:

Name Type Description
HedSchemaUnitClassSection 'HedSchemaUnitClassSection'

The unit classes section.

unit_modifiers property

unit_modifiers: 'HedSchemaSection'

Return the modifiers classes schema section.

Returns:

Name Type Description
HedSchemaSection 'HedSchemaSection'

The unit modifiers section.

units property

units: 'HedSchemaUnitSection'

Return the unit schema section.

Returns:

Name Type Description
HedSchemaUnitSection 'HedSchemaUnitSection'

The unit section.

valid_prefixes property

valid_prefixes: list[str]

Return a list of all prefixes this schema will accept

Returns:

Type Description
list[str]

list[str]: A list of valid tag prefixes for this schema.

Notes
  • The return value is always length 1 if using a HedSchema.

value_classes property

value_classes: 'HedSchemaSection'

Return the value classes schema section.

Returns:

Name Type Description
HedSchemaSection 'HedSchemaSection'

The value classes section.

version property

version: str

The complete schema version, including prefix and library name(if applicable).

Returns:

Name Type Description
str str

The complete schema version including library name and namespace.

version_number property

version_number: str

The HED version of this schema.

Returns:

Name Type Description
str str

The version of this schema.

with_standard property

with_standard: str

The version of the base schema this is extended from, if it exists.

Returns:

Name Type Description
str str

HED version or empty string.

can_save

can_save() -> bool

Returns if it's legal to save this schema.

You cannot save schemas loaded as merged from multiple library schemas.

Returns:

Name Type Description
bool bool

True if this can be saved.

Source code in hed/schema/hed_schema.py
def can_save(self) -> bool:
    """ Returns if it's legal to save this schema.

    You cannot save schemas loaded as merged from multiple library schemas.

    Returns:
        bool: True if this can be saved.
    """
    return not self.library or "," not in self.library

check_compliance

check_compliance(
    check_for_warnings=True, name=None, error_handler=None
) -> list[dict]

Check for HED3 compliance of this schema.

Parameters:

Name Type Description Default
check_for_warnings bool

If True, checks for formatting issues like invalid characters, capitalization.

True
name str

If present, use as the filename for context, rather than using the actual filename. Useful for temp filenames when supporting web services.

None
error_handler ErrorHandler or None

Used to report errors. Uses a default one if none passed in.

None

Returns:

Type Description
list[dict]

list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.

Source code in hed/schema/hed_schema.py
def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
    """ Check for HED3 compliance of this schema.

    Parameters:
        check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
        name (str): If present, use as the filename for context, rather than using the actual filename.
                    Useful for temp filenames when supporting web services.
        error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

    Returns:
        list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
    """
    from hed.schema import schema_compliance
    return schema_compliance.check_compliance(self, check_for_warnings, name, error_handler)

finalize_dictionaries

finalize_dictionaries()

Call to finish loading.

Source code in hed/schema/hed_schema.py
def finalize_dictionaries(self):
    """ Call to finish loading. """
    # Kludge - Reset this here so it recalculates while having all properties
    self._schema83 = None
    self._update_all_entries()

find_tag_entry

find_tag_entry(
    tag, schema_namespace=""
) -> tuple[
    Union["HedTagEntry", None], Union[str, None], list[dict]
]

Find the schema entry for a given source tag.

Parameters:

Name Type Description Default
tag (str, HedTag)

Any form of tag to look up. Can have an extension, value, etc.

required
schema_namespace str

The schema namespace of the tag, if any.

''

Returns:

Type Description
Union['HedTagEntry', None]

tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:

Union[str, None]
  • The located tag entry for this tag.
list[dict]
  • The remainder of the tag that isn't part of the base tag.
tuple[Union['HedTagEntry', None], Union[str, None], list[dict]]
  • A list of errors while converting.
Notes

Works left to right (which is mostly relevant for errors).

Source code in hed/schema/hed_schema.py
def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
    """ Find the schema entry for a given source tag.

    Parameters:
        tag (str, HedTag):     Any form of tag to look up.  Can have an extension, value, etc.
        schema_namespace (str):  The schema namespace of the tag, if any.

    Returns:
        tuple[Union["HedTagEntry", None], Union[str, None], list[dict]]:
        - The located tag entry for this tag.
        - The remainder of the tag that isn't part of the base tag.
        - A list of errors while converting.

    Notes:
        Works left to right (which is mostly relevant for errors).

    """
    if schema_namespace != self._namespace:
        validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                      schema_namespace, self.valid_prefixes)
        return None, None, validation_issues
    return self._find_tag_entry(tag, schema_namespace)

get_as_dataframes

get_as_dataframes(save_merged=False) -> dict[DataFrame]

Get a dict of dataframes representing this file

Parameters:

Name Type Description Default
save_merged bool

If True, returns DFs as if merged with standard.

False

Returns:

Type Description
dict[DataFrame]

dict[DataFrame]: A dict of dataframes you can load as a schema.

Source code in hed/schema/hed_schema.py
def get_as_dataframes(self, save_merged=False) -> dict[DataFrame]:
    """ Get a dict of dataframes representing this file

    Parameters:
        save_merged (bool): If True, returns DFs as if merged with standard.

    Returns:
        dict[DataFrame]: A dict of dataframes you can load as a schema.
    """
    output_dfs = Schema2DF().process_schema(self, save_merged)
    return output_dfs

get_as_mediawiki_string

get_as_mediawiki_string(save_merged=False) -> str

Return the schema to a mediawiki string.

Parameters:

Name Type Description Default
save_merged bool

If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.

False

Returns:

Name Type Description
str str

The schema as a string in mediawiki format.

Source code in hed/schema/hed_schema.py
def get_as_mediawiki_string(self, save_merged=False) -> str:
    """ Return the schema to a mediawiki string.

    Parameters:
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.

    Returns:
        str: The schema as a string in mediawiki format.

    """
    output_strings = Schema2Wiki().process_schema(self, save_merged)
    return '\n'.join(output_strings)

get_as_xml_string

get_as_xml_string(save_merged=True) -> str

Return the schema to an XML string.

Parameters:

Name Type Description Default
save_merged bool

If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.

True

Returns:

Name Type Description
str str

The schema as an XML string.

Source code in hed/schema/hed_schema.py
def get_as_xml_string(self, save_merged=True) -> str:
    """ Return the schema to an XML string.

    Parameters:
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.

    Returns:
        str: The schema as an XML string.

    """
    xml_tree = Schema2XML().process_schema(self, save_merged)
    return schema_util.xml_element_2_str(xml_tree)

get_extras

get_extras(extras_key) -> Union[DataFrame, None]

Get the extras corresponding to the given key

Parameters:

Name Type Description Default
extras_key str

The key to check for in the extras dictionary.

required

Returns:

Type Description
Union[DataFrame, None]

Union[DataFrame, None]: The DataFrame for this extras key, or None if it doesn't exist or is empty.

Source code in hed/schema/hed_schema.py
def get_extras(self, extras_key) -> Union[DataFrame, None]:
    """ Get the extras corresponding to the given key

    Parameters:
        extras_key (str): The key to check for in the extras dictionary.

    Returns:
        Union[DataFrame, None]: The DataFrame for this extras key, or None if it doesn't exist or is empty.
    """
    if not hasattr(self, 'extras') or not extras_key in self.extras:
        return None
    externals = self.extras[extras_key]
    if externals.empty:
        return None
    return externals

get_formatted_version

get_formatted_version() -> str

The HED version string including namespace and library name if any of this schema.

Returns:

Name Type Description
str str

A json formatted string of the complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema.py
def get_formatted_version(self) -> str:
    """ The HED version string including namespace and library name if any of this schema.

    Returns:
        str: A json formatted string of the complete version of this schema including library name and namespace.
    """
    return json.dumps(self.version)

get_save_header_attributes

get_save_header_attributes(
    save_merged: bool = False,
) -> dict

Returns the attributes that should be saved.

Parameters:

Name Type Description Default
save_merged bool

Whether to save as merged schema.

False

Returns:

Name Type Description
dict dict

The header attributes dictionary.

Source code in hed/schema/hed_schema.py
def get_save_header_attributes(self, save_merged: bool = False) -> dict:
    """ Returns the attributes that should be saved.

    Parameters:
        save_merged (bool): Whether to save as merged schema.

    Returns:
        dict: The header attributes dictionary.
    """
    sort_to_start = "!!!!!!!!!!!!!!"
    header_attributes = dict(sorted(self.header_attributes.items(),
                                    key=lambda x: sort_to_start if x[0] == VERSION_ATTRIBUTE else x[0],
                                    reverse=False))
    if save_merged:
        header_attributes.pop(UNMERGED_ATTRIBUTE, None)
    else:
        # make sure it's the last attribute(just to make sure it's in an order)
        header_attributes.pop(UNMERGED_ATTRIBUTE, None)
        header_attributes[UNMERGED_ATTRIBUTE] = "True"

    return header_attributes

get_schema_versions

get_schema_versions() -> list[str]

A list of HED version strings including namespace and library name if any of this schema.

Returns:

Type Description
list[str]

list[str]: The complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema.py
def get_schema_versions(self) -> list[str]:
    """ A list of HED version strings including namespace and library name if any of this schema.

    Returns:
        list[str]: The complete version of this schema including library name and namespace.
    """
    return [self.get_formatted_version()]

get_tag_attribute_names_old

get_tag_attribute_names_old() -> dict[str, HedSchemaEntry]

Return a dict of all allowed tag attributes.

Returns:

Type Description
dict[str, HedSchemaEntry]

dict[str, HedSchemaEntry]: A dictionary whose keys are attribute names and values are HedSchemaEntry object.

Source code in hed/schema/hed_schema.py
def get_tag_attribute_names_old(self) -> dict[str, HedSchemaEntry]:
    """ Return a dict of all allowed tag attributes.

    Returns:
        dict[str, HedSchemaEntry]: A dictionary whose keys are attribute names and values are HedSchemaEntry object.

    """
    return {tag_entry.name: tag_entry for tag_entry in self._sections[HedSectionKey.Attributes].values()
            if not tag_entry.has_attribute(HedKeyOld.UnitClassProperty)
            and not tag_entry.has_attribute(HedKeyOld.UnitProperty)
            and not tag_entry.has_attribute(HedKeyOld.UnitModifierProperty)
            and not tag_entry.has_attribute(HedKeyOld.ValueClassProperty)}

get_tag_entry

get_tag_entry(
    name: str,
    key_class=HedSectionKey.Tags,
    schema_namespace: str = "",
) -> Union["HedSchemaEntry", None]

Return the schema entry for this tag, if one exists.

Parameters:

Name Type Description Default
name str

Any form of basic tag(or other section entry) to look up. This will not handle extensions or similar. If this is a tag, it can have a schema namespace, but it's not required

required
key_class HedSectionKey or str

The type of entry to return.

Tags
schema_namespace str

Only used on Tags. If incorrect, will return None.

''

Returns:

Type Description
Union['HedSchemaEntry', None]

HedSchemaEntry or None: The schema entry for the given tag, or None if not found.

Source code in hed/schema/hed_schema.py
def get_tag_entry(self, name: str, key_class=HedSectionKey.Tags, schema_namespace: str = "") -> Union["HedSchemaEntry", None]:
    """ Return the schema entry for this tag, if one exists.

    Parameters:
        name (str): Any form of basic tag(or other section entry) to look up.
            This will not handle extensions or similar.
            If this is a tag, it can have a schema namespace, but it's not required
        key_class (HedSectionKey or str):  The type of entry to return.
        schema_namespace (str): Only used on Tags.  If incorrect, will return None.

    Returns:
        HedSchemaEntry or None: The schema entry for the given tag, or None if not found.
    """
    if key_class == HedSectionKey.Tags:
        if schema_namespace != self._namespace:
            return None
        if name.startswith(self._namespace):
            name = name[len(self._namespace):]

    return self._get_tag_entry(name, key_class)

get_tags_with_attribute

get_tags_with_attribute(
    attribute, key_class=HedSectionKey.Tags
) -> list["HedSchemaEntry"]

Return tag entries with the given attribute.

Parameters:

Name Type Description Default
attribute str

A tag attribute. Eg HedKey.ExtensionAllowed

required
key_class HedSectionKey

The HedSectionKey for the section to retrieve from.

Tags

Returns:

Type Description
list['HedSchemaEntry']

list[HedSchemaEntry]: A list of all tags with this attribute.

Notes
  • The result is cached so will be fast after first call.
Source code in hed/schema/hed_schema.py
def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list["HedSchemaEntry"]:
    """ Return tag entries with the given attribute.

    Parameters:
        attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
        key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

    Returns:
        list[HedSchemaEntry]: A list of all tags with this attribute.

    Notes:
        - The result is cached so will be fast after first call.
    """
    return self._sections[key_class].get_entries_with_attribute(attribute, return_name_only=True,
                                                                schema_namespace=self._namespace)

has_duplicates

has_duplicates()

Returns the first duplicate tag/unit/etc. if any section has a duplicate name

Source code in hed/schema/hed_schema.py
def has_duplicates(self):
    """Returns the first duplicate tag/unit/etc. if any section has a duplicate name"""
    for section in self._sections.values():
        has_duplicates = bool(section.duplicate_names)
        if has_duplicates:
            # Return first entry of dict
            return next(iter(section.duplicate_names))

    return False

save_as_dataframes

save_as_dataframes(base_filename, save_merged=False)

Save as dataframes to a folder of files.

If base_filename has a .tsv suffix, save directly to the indicated location.
If base_filename is a directory(does NOT have a .tsv suffix), save the contents into a directory named that.
The subfiles are named the same.  e.g. HED8.3.0/HED8.3.0_Tag.tsv

Parameters:

Name Type Description Default
base_filename str

Save filename. A suffix will be added to most, e.g. _Tag

required
save_merged bool

If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.

False

Raises:

Type Description
OSError

File cannot be saved for some reason.

Source code in hed/schema/hed_schema.py
def save_as_dataframes(self, base_filename, save_merged=False):
    """ Save as dataframes to a folder of files.

        If base_filename has a .tsv suffix, save directly to the indicated location.
        If base_filename is a directory(does NOT have a .tsv suffix), save the contents into a directory named that.
        The subfiles are named the same.  e.g. HED8.3.0/HED8.3.0_Tag.tsv

    Parameters:
        base_filename (str): Save filename. A suffix will be added to most, e.g. _Tag
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.


    Raises:
        OSError: File cannot be saved for some reason.
    """
    output_dfs = Schema2DF().process_schema(self, save_merged)
    if hasattr(self, 'extras') and self.extras:
       output_dfs.update(self.extras)
    df_util.save_dataframes(base_filename, output_dfs)

save_as_mediawiki

save_as_mediawiki(filename, save_merged=False)

Save as mediawiki to a file.

Parameters:

Name Type Description Default
filename str

Save location.

required
save_merged bool

If True, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.

False

Raises:

Type Description
OSError

File cannot be saved for some reason.

Source code in hed/schema/hed_schema.py
def save_as_mediawiki(self, filename, save_merged=False):
    """ Save as mediawiki to a file.

    Parameters:
        filename (str): Save location.
        save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.


    Raises:
        OSError: File cannot be saved for some reason.
    """
    output_strings = Schema2Wiki().process_schema(self, save_merged)
    with open(filename, mode='w', encoding='utf-8') as opened_file:
        for string in output_strings:
            opened_file.write(string)
            opened_file.write('\n')

save_as_xml

save_as_xml(filename, save_merged=True)

Save as XML to a file.

Parameters:

Name Type Description Default
filename str

Save location.

required
save_merged bool

If true, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect.

True

Raises:

Type Description
OSError

File cannot be saved for some reason.

Source code in hed/schema/hed_schema.py
def save_as_xml(self, filename, save_merged=True):
    """ Save as XML to a file.

    Parameters:
        filename (str): Save location.
        save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema.
                            If it is not a "withStandard" schema, this setting has no effect.


    Raises:
        OSError: File cannot be saved for some reason.
    """
    xml_tree = Schema2XML().process_schema(self, save_merged)
    with open(filename, mode='w', encoding='utf-8') as opened_file:
        xml_string = schema_util.xml_element_2_str(xml_tree)
        opened_file.write(xml_string)

schema_for_namespace

schema_for_namespace(
    namespace: str,
) -> Union["HedSchema", None]

Return HedSchema object for this namespace.

Parameters:

Name Type Description Default
namespace str

The schema library name namespace.

required

Returns:

Type Description
Union['HedSchema', None]

HedSchema or None: The HED schema object for this schema, or None if namespace doesn't match.

Source code in hed/schema/hed_schema.py
def schema_for_namespace(self, namespace: str) -> Union["HedSchema", None]:
    """ Return HedSchema object for this namespace.

    Parameters:
        namespace (str): The schema library name namespace.

    Returns:
        HedSchema or None: The HED schema object for this schema, or None if namespace doesn't match.
    """
    if self._namespace != namespace:
        return None
    return self

set_schema_prefix

set_schema_prefix(schema_namespace)

Set library namespace associated for this schema.

Parameters:

Name Type Description Default
schema_namespace str

Should be empty, or end with a colon.(Colon will be automated added if missing).

required

:raises HedFileError: - The prefix is invalid

Source code in hed/schema/hed_schema.py
def set_schema_prefix(self, schema_namespace):
    """ Set library namespace associated for this schema.

    Parameters:
        schema_namespace (str): Should be empty, or end with a colon.(Colon will be automated added if missing).

    :raises HedFileError:
        - The prefix is invalid
    """
    if schema_namespace and schema_namespace[-1] != ":":
        schema_namespace += ":"

    if schema_namespace and not schema_namespace[:-1].isalpha():
        raise HedFileError(HedExceptions.INVALID_LIBRARY_PREFIX,
                           "Schema namespace must contain only alpha characters",
                           self.filename)

    self._namespace = schema_namespace

Schema I/O

hed_schema_io

Utilities for loading and outputting HED schema.

from_dataframes

from_dataframes(
    schema_data, schema_namespace=None, name=None
) -> "HedSchema"

Create a schema from the given string.

Parameters:

Name Type Description Default
schema_data dict

A dict of DF_SUFFIXES:file_as_string_or_df Should have an entry for all values of DF_SUFFIXES.

required
schema_namespace (str, None)

The name_prefix all tags in this schema will accept.

None
name str or None

User supplied identifier for this schema

None

Returns:

Name Type Description
HedSchema 'HedSchema'

The loaded schema.

:raises HedFileError: - Empty/invalid parameters

Notes
  • The loading is determined by file type.
Source code in hed/schema/hed_schema_io.py
def from_dataframes(schema_data, schema_namespace=None, name=None) -> 'HedSchema':
    """ Create a schema from the given string.

    Parameters:
        schema_data (dict): A dict of DF_SUFFIXES:file_as_string_or_df
                              Should have an entry for all values of DF_SUFFIXES.
        schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
        name (str or None): User supplied identifier for this schema

    Returns:
        HedSchema:  The loaded schema.

    :raises HedFileError:
        - Empty/invalid parameters

    Notes:
        - The loading is determined by file type.

    """
    if not schema_data or not isinstance(schema_data, dict):
        raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty or non dict value passed to HedSchema.from_dataframes",
                           filename=name)

    hed_schema = SchemaLoaderDF.load_spreadsheet(schema_as_strings_or_df=schema_data, name=name)

    if schema_namespace:
        hed_schema.set_schema_prefix(schema_namespace=schema_namespace)

    return hed_schema

from_string

from_string(
    schema_string,
    schema_format=".xml",
    schema_namespace=None,
    schema=None,
    name=None,
)

Create a schema from the given string.

Parameters:

Name Type Description Default
schema_string str

An XML or mediawiki file as a single long string

required
schema_format str

The schema format of the source schema string. Allowed normal values: .mediawiki, .xml

'.xml'
schema_namespace (str, None)

The name_prefix all tags in this schema will accept.

None
schema HedSchema or None

A HED schema to merge this new file into It must be a with-standard schema with the same value.

None
name str or None

User supplied identifier for this schema

None

Returns:

Type Description
HedSchema

The loaded schema.

:raises HedFileError: - If empty string or invalid extension is passed. - Other fatal formatting issues with file

Notes
  • The loading is determined by file type.
Source code in hed/schema/hed_schema_io.py
def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None, name=None):
    """ Create a schema from the given string.

    Parameters:
        schema_string (str): An XML or mediawiki file as a single long string
        schema_format (str):         The schema format of the source schema string.
            Allowed normal values: .mediawiki, .xml
        schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
        schema (HedSchema or None): A HED schema to merge this new file into
                                   It must be a with-standard schema with the same value.
        name (str or None): User supplied identifier for this schema

    Returns:
        (HedSchema):  The loaded schema.

    :raises HedFileError:
        - If empty string or invalid extension is passed.
        - Other fatal formatting issues with file

    Notes:
        - The loading is determined by file type.

    """
    if not schema_string:
        raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string",
                           filename=name)

    if isinstance(schema_string, str):
        # Replace carriage returns with new lines since this might not be done by the caller
        schema_string = schema_string.replace("\r\n", "\n")

    if schema_format.endswith(".xml"):
        hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name)
    elif schema_format.endswith(".mediawiki"):
        hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name)
    else:
        raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name)

    if schema_namespace:
        hed_schema.set_schema_prefix(schema_namespace=schema_namespace)
    return hed_schema

get_hed_xml_version

get_hed_xml_version(xml_file_path) -> str

Get the version number from a HED XML file.

Parameters:

Name Type Description Default
xml_file_path str

The path to a HED XML file.

required

Returns:

Name Type Description
str str

The version number of the HED XML file.

:raises HedFileError: - There is an issue loading the schema

Source code in hed/schema/hed_schema_io.py
def get_hed_xml_version(xml_file_path) -> str:
    """ Get the version number from a HED XML file.

    Parameters:
        xml_file_path (str): The path to a HED XML file.

    Returns:
        str: The version number of the HED XML file.

    :raises HedFileError:
        - There is an issue loading the schema
    """
    parser = SchemaLoaderXML(xml_file_path)
    return parser.schema.version

load_schema

load_schema(
    hed_path, schema_namespace=None, schema=None, name=None
) -> "HedSchema"

Load a schema from the given file or URL path.

Parameters:

Name Type Description Default
hed_path str

A filepath or url to open a schema from. If loading a TSV file, this should be a single filename where: Template: basename.tsv, where files are named basename_Struct.tsv, basename_Tag.tsv, etc. Alternatively, you can point to a directory containing the .tsv files.

required
schema_namespace str or None

The name_prefix all tags in this schema will accept.

None
schema HedSchema or None

A HED schema to merge this new file into It must be a with-standard schema with the same value.

None
name str or None

User supplied identifier for this schema

None

Returns:

Name Type Description
HedSchema 'HedSchema'

The loaded schema.

:raises HedFileError: - Empty path passed - Unknown extension - Any fatal issues when loading the schema.

Source code in hed/schema/hed_schema_io.py
def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> 'HedSchema':
    """ Load a schema from the given file or URL path.

    Parameters:
        hed_path (str): A filepath or url to open a schema from.
            If loading a TSV file, this should be a single filename where:
            Template: basename.tsv, where files are named basename_Struct.tsv, basename_Tag.tsv, etc.
            Alternatively, you can point to a directory containing the .tsv files.
        schema_namespace (str or None): The name_prefix all tags in this schema will accept.
        schema (HedSchema or None): A HED schema to merge this new file into
                                   It must be a with-standard schema with the same value.
        name (str or None): User supplied identifier for this schema

    Returns:
        HedSchema: The loaded schema.

    :raises HedFileError:
        - Empty path passed
        - Unknown extension
        - Any fatal issues when loading the schema.

    """
    if not hed_path:
        raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file",
                           filename=hed_path)

    is_url = hed_cache._check_if_url(hed_path)
    if is_url:
        try:
            file_as_string = schema_util.url_to_string(hed_path)
        except URLError as e:
            raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e
        hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name)
    elif hed_path.lower().endswith(".xml"):
        hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name)
    elif hed_path.lower().endswith(".mediawiki"):
        hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema, name=name)
    elif hed_path.lower().endswith(".tsv") or os.path.isdir(hed_path):
        if schema is not None:
            raise HedFileError(HedExceptions.INVALID_HED_FORMAT,
                               "Cannot pass a schema to merge into spreadsheet loading currently.", filename=name)
        hed_schema = SchemaLoaderDF.load_spreadsheet(filenames=hed_path, name=name)
    else:
        raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path)

    if schema_namespace:
        hed_schema.set_schema_prefix(schema_namespace=schema_namespace)

    return hed_schema

load_schema_version

load_schema_version(
    xml_version=None, xml_folder=None
) -> Union["HedSchema", "HedSchemaGroup"]

Return a HedSchema or HedSchemaGroup extracted from xml_version

Parameters:

Name Type Description Default
xml_version str or list

List or str specifying which official HED schemas to use. A json str format is also supported, based on the output of HedSchema.get_formatted_version Basic format: [schema_namespace:][library_name_]X.Y.Z.

None
xml_folder str

Path to a folder containing schema.

None

Returns:

Type Description
Union['HedSchema', 'HedSchemaGroup']

HedSchema or HedSchemaGroup: The schema or schema group extracted.

:raises HedFileError: - The xml_version is not valid. - The specified version cannot be found or loaded - Other fatal errors loading the schema (These are unlikely if you are not editing them locally) - The prefix is invalid

Source code in hed/schema/hed_schema_io.py
def load_schema_version(xml_version=None, xml_folder=None) -> Union['HedSchema', 'HedSchemaGroup']:
    """ Return a HedSchema or HedSchemaGroup extracted from xml_version

    Parameters:
        xml_version (str or list): List or str specifying which official HED schemas to use.
                                           A json str format is also supported,
                                           based on the output of HedSchema.get_formatted_version
                                           Basic format: `[schema_namespace:][library_name_]X.Y.Z`.
        xml_folder (str): Path to a folder containing schema.

    Returns:
        HedSchema or HedSchemaGroup: The schema or schema group extracted.

    :raises HedFileError:
        - The xml_version is not valid.
        - The specified version cannot be found or loaded
        - Other fatal errors loading the schema (These are unlikely if you are not editing them locally)
        - The prefix is invalid
    """
    # Check if we start and end with a square bracket, or double quote.  This might be valid json
    if xml_version and isinstance(xml_version, str) and \
            ((xml_version[0], xml_version[-1]) in [('[', ']'), ('"', '"')]):
        try:
            xml_version = json.loads(xml_version)
        except json.decoder.JSONDecodeError as e:
            raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e
    if xml_version and isinstance(xml_version, list):
        xml_versions = parse_version_list(xml_version)
        schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in
                   xml_versions.values()]
        if len(schemas) == 1:
            return schemas[0]

        name = ",".join([schema.version for schema in schemas])
        return HedSchemaGroup(schemas, name=name)
    else:
        return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder)

parse_version_list

parse_version_list(xml_version_list) -> dict

Takes a list of xml versions and returns a dictionary split by prefix

e.g. ["score", "testlib"] will return {"": "score, testlib"}
e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"}

Parameters:

Name Type Description Default
xml_version_list list

List of str specifying which HED schemas to use

required

Returns:

Name Type Description
dict dict

A dictionary of version strings split by prefix.

Source code in hed/schema/hed_schema_io.py
def parse_version_list(xml_version_list) -> dict:
    """Takes a list of xml versions and returns a dictionary split by prefix

        e.g. ["score", "testlib"] will return {"": "score, testlib"}
        e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"}

    Parameters:
        xml_version_list (list): List of str specifying which HED schemas to use

    Returns:
        dict: A dictionary of version strings split by prefix.
    """
    out_versions = defaultdict(list)
    for version in xml_version_list:
        schema_namespace = ""
        if version and ":" in version:
            schema_namespace, _, version = version.partition(":")

        if not isinstance(version, str):
            raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
                               f"Must specify schema version by number, found no version on {xml_version_list} schema.",
                               filename=None)
        if version in out_versions[schema_namespace]:
            raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_LIBRARY,
                               f"Attempting to load same library '{version}' twice: {out_versions[schema_namespace]}",
                               filename=None)
        out_versions[schema_namespace].append(version)

    out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in
                    out_versions.items()}

    return out_versions

HedSchemaEntry

HedSchemaEntry

A single node in a HedSchema.

The structure contains all the node information including attributes and properties.

Source code in hed/schema/hed_schema_entry.py
class HedSchemaEntry:
    """ A single node in a HedSchema.

        The structure contains all the node information including attributes and properties.

    """
    def __init__(self, name, section):
        """ Constructor for HedSchemaEntry.

        Parameters:
            name (str): The name of the entry.
            section (HedSchemaSection):  The section to which it belongs.

        """
        self.name = name
        # key: property/attribute name, value = property value.  Will often be a bool
        self.attributes = {}
        self.description = None
        self._section = section

        # This section is largely unused.  It will only be filled in when we try to add an attribute
        # that isn't valid in this section.
        self._unknown_attributes = None

    def finalize_entry(self, schema):
        """ Called once after loading to set internal state.

        Parameters:
            schema (HedSchema): The schema that holds the rules.

        """
        # Clear out any known attributes from the unknown section
        to_remove = []
        if self._unknown_attributes:
            for attribute in self._unknown_attributes:
                if attribute in self._section.valid_attributes:
                    to_remove.append(attribute)

            for item in to_remove:
                self._unknown_attributes.pop(item)

    def has_attribute(self, attribute, return_value=False) -> Union[bool, Any]:
        """ Checks for the existence of an attribute in this entry.

        Parameters:
            attribute (str): The attribute to check for.
            return_value (bool): If True, returns the actual value of the attribute.
                                 If False, returns a boolean indicating the presence of the attribute.

        Returns:
            bool or any: If return_value is False, returns True if the attribute exists and False otherwise.
            If return_value is True, returns the value of the attribute if it exists, else returns None.

        Notes:
            - The existence of an attribute does not guarantee its validity.
        """
        if return_value:
            return self.attributes.get(attribute, None)
        else:
            return attribute in self.attributes

    def attribute_has_property(self, attribute, property_name) -> bool:
        """ Return True if attribute has property.

        Parameters:
            attribute (str): Attribute name to check for property_name.
            property_name (str): The property value to return.

        Returns:
            bool: Returns True if this entry has the property.

        """
        attr_entry = self._section.valid_attributes.get(attribute)
        if attr_entry and attr_entry.has_attribute(property_name):
            return True
        return False

    def _set_attribute_value(self, attribute, attribute_value):
        """ Add attribute and set its value.

        Parameters:
            attribute (str): The name of the schema entry attribute.
            attribute_value (bool or str):  The value of the attribute.

        Notes:
            - If this an invalid attribute name, it will be also added as an unknown attribute.

        """
        if not attribute_value:
            return

        # todo: remove this patch and redo the code
        # This check doesn't need to be done if the schema is valid.
        if attribute not in self._section.valid_attributes:
            # print(f"Unknown attribute {attribute}")
            if self._unknown_attributes is None:
                self._unknown_attributes = {}
            self._unknown_attributes[attribute] = attribute_value
        self.attributes[attribute] = attribute_value

    @property
    def section_key(self):
        return self._section.section_key

    def __eq__(self, other):
        if self.name != other.name:
            return False
        if not self._compare_attributes_no_order(self.attributes, other.attributes):
            return False
        if self.description != other.description:
            return False
        return True

    def __hash__(self):
        return hash(self.name)

    def __str__(self):
        return self.name

    @staticmethod
    def _compare_attributes_no_order(left, right):
        if left != right:
            left = {name: (set(value.split(",")) if isinstance(value, str) else value)
                    for (name, value) in left.items()}
            right = {name: (set(value.split(",")) if isinstance(value, str) else value)
                     for (name, value) in right.items()}

        return left == right

attribute_has_property

attribute_has_property(attribute, property_name) -> bool

Return True if attribute has property.

Parameters:

Name Type Description Default
attribute str

Attribute name to check for property_name.

required
property_name str

The property value to return.

required

Returns:

Name Type Description
bool bool

Returns True if this entry has the property.

Source code in hed/schema/hed_schema_entry.py
def attribute_has_property(self, attribute, property_name) -> bool:
    """ Return True if attribute has property.

    Parameters:
        attribute (str): Attribute name to check for property_name.
        property_name (str): The property value to return.

    Returns:
        bool: Returns True if this entry has the property.

    """
    attr_entry = self._section.valid_attributes.get(attribute)
    if attr_entry and attr_entry.has_attribute(property_name):
        return True
    return False

finalize_entry

finalize_entry(schema)

Called once after loading to set internal state.

Parameters:

Name Type Description Default
schema HedSchema

The schema that holds the rules.

required
Source code in hed/schema/hed_schema_entry.py
def finalize_entry(self, schema):
    """ Called once after loading to set internal state.

    Parameters:
        schema (HedSchema): The schema that holds the rules.

    """
    # Clear out any known attributes from the unknown section
    to_remove = []
    if self._unknown_attributes:
        for attribute in self._unknown_attributes:
            if attribute in self._section.valid_attributes:
                to_remove.append(attribute)

        for item in to_remove:
            self._unknown_attributes.pop(item)

has_attribute

has_attribute(
    attribute, return_value=False
) -> Union[bool, Any]

Checks for the existence of an attribute in this entry.

Parameters:

Name Type Description Default
attribute str

The attribute to check for.

required
return_value bool

If True, returns the actual value of the attribute. If False, returns a boolean indicating the presence of the attribute.

False

Returns:

Type Description
Union[bool, Any]

bool or any: If return_value is False, returns True if the attribute exists and False otherwise.

Union[bool, Any]

If return_value is True, returns the value of the attribute if it exists, else returns None.

Notes
  • The existence of an attribute does not guarantee its validity.
Source code in hed/schema/hed_schema_entry.py
def has_attribute(self, attribute, return_value=False) -> Union[bool, Any]:
    """ Checks for the existence of an attribute in this entry.

    Parameters:
        attribute (str): The attribute to check for.
        return_value (bool): If True, returns the actual value of the attribute.
                             If False, returns a boolean indicating the presence of the attribute.

    Returns:
        bool or any: If return_value is False, returns True if the attribute exists and False otherwise.
        If return_value is True, returns the value of the attribute if it exists, else returns None.

    Notes:
        - The existence of an attribute does not guarantee its validity.
    """
    if return_value:
        return self.attributes.get(attribute, None)
    else:
        return attribute in self.attributes

HedSchemaGroup

HedSchemaGroup

Bases: HedSchemaBase

Container for multiple HedSchema objects.

Notes
  • The container class is useful when library schema are included.
  • You cannot save/load/etc. the combined schema object directly.
Source code in hed/schema/hed_schema_group.py
class HedSchemaGroup(HedSchemaBase):
    """ Container for multiple HedSchema objects.

    Notes:
        - The container class is useful when library schema are included.
        - You cannot save/load/etc. the combined schema object directly.

    """
    def __init__(self, schema_list, name=""):
        """ Combine multiple HedSchema objects from a list.

        Parameters:
            schema_list (list): A list of HedSchema for the container.

        Returns:
            HedSchemaGroup: the container created.

        :raises HedFileError:
            - Multiple schemas have the same library prefixes.
            - Empty list passed
        """
        super().__init__()
        if len(schema_list) == 0:
            raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty list passed to HedSchemaGroup constructor.",
                               filename=self.name)
        schema_prefixes = [hed_schema._namespace for hed_schema in schema_list]
        if len(set(schema_prefixes)) != len(schema_prefixes):
            raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX,
                               "Multiple schema share the same tag name_prefix.  This is not allowed.",
                               filename=self.name)
        self._schemas = {hed_schema._namespace: hed_schema for hed_schema in schema_list}
        source_formats = [hed_schema.source_format for hed_schema in schema_list]
        # All must be same source format or return None.
        self.source_format = source_formats[0] if len(set(source_formats)) == 1 else None
        self._name = name

    def get_schema_versions(self) -> list[str]:
        """ A list of HED version strings including namespace and library name if any for these schemas.

        Returns:
            list[str]: The complete version of this schema including library name and namespace.
        """
        return [schema.version for schema in self._schemas.values()]

    def get_formatted_version(self) -> str:
        """ The HED version string including namespace and library name if any of this schema.

        Returns:
            str: The complete version of this schema including library name and namespace.
        """
        return json.dumps(self.get_schema_versions())

    def __eq__(self, other):
        return self._schemas == other._schemas

    def schema_for_namespace(self, namespace) -> Union[HedSchema,None]:
        """ Return the HedSchema for the library namespace.

        Parameters:
            namespace (str): A schema library name namespace.

        Returns:
            Union[HedSchema,None]: The specific schema for this library name namespace if exists.

        """
        schema = self._schemas.get(namespace)
        return schema

    @property
    def valid_prefixes(self) -> list[str]:
        """ Return a list of all prefixes this group will accept.

        Returns:
            list[str]:  A list of strings representing valid prefixes for this group.

        """
        return list(self._schemas.keys())

    def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
        """ Check for HED3 compliance of this schema.

        Parameters:
            check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
            name (str): If present, use as the filename for context, rather than using the actual filename.
                        Useful for temp filenames when supporting web services.
            error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

        Returns:
            list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
        """
        issues_list = []
        for schema in self._schemas.values():
            issues_list += schema.check_compliance(check_for_warnings, name, error_handler)
        return issues_list

    def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list:
        """ Return tag entries with the given attribute.

        Parameters:
            attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
            key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

        Returns:
            list: A list of all tags with this attribute.

        Notes:
            - The result is cached so will be fast after first call.
        """
        tags = set()
        for schema in self._schemas.values():
            tags.update(schema.get_tags_with_attribute(attribute, key_class))
        return list(tags)

    def get_tag_entry(self, name, key_class=HedSectionKey.Tags, schema_namespace="") -> Union["HedSchemaEntry", None]:
        """ Return the schema entry for this tag, if one exists.

        Parameters:
            name (str): Any form of basic tag(or other section entry) to look up.
                This will not handle extensions or similar.
                If this is a tag, it can have a schema namespace, but it's not required
            key_class (HedSectionKey or str):  The type of entry to return.
            schema_namespace (str): Only used on Tags.  If incorrect, will return None.

        Returns:
            HedSchemaEntry: The schema entry for the given tag.
        """
        specific_schema = self.schema_for_namespace(schema_namespace)
        if not specific_schema:
            return None

        return specific_schema.get_tag_entry(name, key_class, schema_namespace)

    def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list]:
        """ Find the schema entry for a given source tag.

        Parameters:
            tag (str, HedTag): Any form of tag to look up.  Can have an extension, value, etc.
            schema_namespace (str): The schema namespace of the tag, if any.

        Returns:
            HedTagEntry: The located tag entry for this tag.
            str: The remainder of the tag that isn't part of the base tag.
            list: A list of errors while converting.

        Notes:
            Works left to right (which is mostly relevant for errors).
        """
        specific_schema = self.schema_for_namespace(schema_namespace)
        if not specific_schema:
            validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                          schema_namespace, self.valid_prefixes)
            return None, None, validation_issues

        return specific_schema._find_tag_entry(tag, schema_namespace)

valid_prefixes property

valid_prefixes: list[str]

Return a list of all prefixes this group will accept.

Returns:

Type Description
list[str]

list[str]: A list of strings representing valid prefixes for this group.

check_compliance

check_compliance(
    check_for_warnings=True, name=None, error_handler=None
) -> list[dict]

Check for HED3 compliance of this schema.

Parameters:

Name Type Description Default
check_for_warnings bool

If True, checks for formatting issues like invalid characters, capitalization.

True
name str

If present, use as the filename for context, rather than using the actual filename. Useful for temp filenames when supporting web services.

None
error_handler ErrorHandler or None

Used to report errors. Uses a default one if none passed in.

None

Returns:

Type Description
list[dict]

list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.

Source code in hed/schema/hed_schema_group.py
def check_compliance(self, check_for_warnings=True, name=None, error_handler=None) -> list[dict]:
    """ Check for HED3 compliance of this schema.

    Parameters:
        check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization.
        name (str): If present, use as the filename for context, rather than using the actual filename.
                    Useful for temp filenames when supporting web services.
        error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.

    Returns:
        list[dict]: A list of all warnings and errors found in the file. Each issue is a dictionary.
    """
    issues_list = []
    for schema in self._schemas.values():
        issues_list += schema.check_compliance(check_for_warnings, name, error_handler)
    return issues_list

find_tag_entry

find_tag_entry(
    tag, schema_namespace=""
) -> tuple[
    Union["HedTagEntry", None], Union[str, None], list
]

Find the schema entry for a given source tag.

Parameters:

Name Type Description Default
tag (str, HedTag)

Any form of tag to look up. Can have an extension, value, etc.

required
schema_namespace str

The schema namespace of the tag, if any.

''

Returns:

Name Type Description
HedTagEntry Union['HedTagEntry', None]

The located tag entry for this tag.

str Union[str, None]

The remainder of the tag that isn't part of the base tag.

list list

A list of errors while converting.

Notes

Works left to right (which is mostly relevant for errors).

Source code in hed/schema/hed_schema_group.py
def find_tag_entry(self, tag, schema_namespace="") -> tuple[Union["HedTagEntry", None], Union[str, None], list]:
    """ Find the schema entry for a given source tag.

    Parameters:
        tag (str, HedTag): Any form of tag to look up.  Can have an extension, value, etc.
        schema_namespace (str): The schema namespace of the tag, if any.

    Returns:
        HedTagEntry: The located tag entry for this tag.
        str: The remainder of the tag that isn't part of the base tag.
        list: A list of errors while converting.

    Notes:
        Works left to right (which is mostly relevant for errors).
    """
    specific_schema = self.schema_for_namespace(schema_namespace)
    if not specific_schema:
        validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag,
                                                      schema_namespace, self.valid_prefixes)
        return None, None, validation_issues

    return specific_schema._find_tag_entry(tag, schema_namespace)

get_formatted_version

get_formatted_version() -> str

The HED version string including namespace and library name if any of this schema.

Returns:

Name Type Description
str str

The complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema_group.py
def get_formatted_version(self) -> str:
    """ The HED version string including namespace and library name if any of this schema.

    Returns:
        str: The complete version of this schema including library name and namespace.
    """
    return json.dumps(self.get_schema_versions())

get_schema_versions

get_schema_versions() -> list[str]

A list of HED version strings including namespace and library name if any for these schemas.

Returns:

Type Description
list[str]

list[str]: The complete version of this schema including library name and namespace.

Source code in hed/schema/hed_schema_group.py
def get_schema_versions(self) -> list[str]:
    """ A list of HED version strings including namespace and library name if any for these schemas.

    Returns:
        list[str]: The complete version of this schema including library name and namespace.
    """
    return [schema.version for schema in self._schemas.values()]

get_tag_entry

get_tag_entry(
    name, key_class=HedSectionKey.Tags, schema_namespace=""
) -> Union["HedSchemaEntry", None]

Return the schema entry for this tag, if one exists.

Parameters:

Name Type Description Default
name str

Any form of basic tag(or other section entry) to look up. This will not handle extensions or similar. If this is a tag, it can have a schema namespace, but it's not required

required
key_class HedSectionKey or str

The type of entry to return.

Tags
schema_namespace str

Only used on Tags. If incorrect, will return None.

''

Returns:

Name Type Description
HedSchemaEntry Union['HedSchemaEntry', None]

The schema entry for the given tag.

Source code in hed/schema/hed_schema_group.py
def get_tag_entry(self, name, key_class=HedSectionKey.Tags, schema_namespace="") -> Union["HedSchemaEntry", None]:
    """ Return the schema entry for this tag, if one exists.

    Parameters:
        name (str): Any form of basic tag(or other section entry) to look up.
            This will not handle extensions or similar.
            If this is a tag, it can have a schema namespace, but it's not required
        key_class (HedSectionKey or str):  The type of entry to return.
        schema_namespace (str): Only used on Tags.  If incorrect, will return None.

    Returns:
        HedSchemaEntry: The schema entry for the given tag.
    """
    specific_schema = self.schema_for_namespace(schema_namespace)
    if not specific_schema:
        return None

    return specific_schema.get_tag_entry(name, key_class, schema_namespace)

get_tags_with_attribute

get_tags_with_attribute(
    attribute, key_class=HedSectionKey.Tags
) -> list

Return tag entries with the given attribute.

Parameters:

Name Type Description Default
attribute str

A tag attribute. Eg HedKey.ExtensionAllowed

required
key_class HedSectionKey

The HedSectionKey for the section to retrieve from.

Tags

Returns:

Name Type Description
list list

A list of all tags with this attribute.

Notes
  • The result is cached so will be fast after first call.
Source code in hed/schema/hed_schema_group.py
def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.Tags) -> list:
    """ Return tag entries with the given attribute.

    Parameters:
        attribute (str): A tag attribute.  Eg HedKey.ExtensionAllowed
        key_class (HedSectionKey): The HedSectionKey for the section to retrieve from.

    Returns:
        list: A list of all tags with this attribute.

    Notes:
        - The result is cached so will be fast after first call.
    """
    tags = set()
    for schema in self._schemas.values():
        tags.update(schema.get_tags_with_attribute(attribute, key_class))
    return list(tags)

schema_for_namespace

schema_for_namespace(namespace) -> Union[HedSchema, None]

Return the HedSchema for the library namespace.

Parameters:

Name Type Description Default
namespace str

A schema library name namespace.

required

Returns:

Type Description
Union[HedSchema, None]

Union[HedSchema,None]: The specific schema for this library name namespace if exists.

Source code in hed/schema/hed_schema_group.py
def schema_for_namespace(self, namespace) -> Union[HedSchema,None]:
    """ Return the HedSchema for the library namespace.

    Parameters:
        namespace (str): A schema library name namespace.

    Returns:
        Union[HedSchema,None]: The specific schema for this library name namespace if exists.

    """
    schema = self._schemas.get(namespace)
    return schema

Schema Comparison

schema_comparer

Functions supporting comparison of schemas.

SchemaComparer

Class for comparing HED schemas and generating change logs.

Source code in hed/schema/schema_comparer.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
class SchemaComparer:
    """Class for comparing HED schemas and generating change logs."""

    # Class-level constants
    MISC_SECTION = "misc"
    HED_ID_SECTION = "HedId changes"
    EXTRAS_SECTION = "Extras changes"
    SOURCES = "Sources"
    PREFIXES = "Prefixes"
    ANNOTATION_PROPERTY_EXTERNAL = "AnnotationPropertyExternal"

    SECTION_ENTRY_NAMES = {
        HedSectionKey.Tags: "Tag",
        HedSectionKey.Units: "Unit",
        HedSectionKey.UnitClasses: "Unit Class",
        HedSectionKey.ValueClasses: "Value Class",
        HedSectionKey.UnitModifiers: "Unit Modifier",
        HedSectionKey.Properties: "Property",
        HedSectionKey.Attributes: "Attribute",
        MISC_SECTION: "Misc Metadata",
        HED_ID_SECTION: "Modified Hed Ids",
        SOURCES: "Sources",
        PREFIXES: "Prefixes",
        ANNOTATION_PROPERTY_EXTERNAL: "AnnotationPropertyExternal",
    }

    SECTION_ENTRY_NAMES_PLURAL = {
        HedSectionKey.Tags: "Tags",
        HedSectionKey.Units: "Units",
        HedSectionKey.UnitClasses: "Unit Classes",
        HedSectionKey.ValueClasses: "Value Classes",
        HedSectionKey.UnitModifiers: "Unit Modifiers",
        HedSectionKey.Properties: "Properties",
        HedSectionKey.Attributes: "Attributes",
        MISC_SECTION: "Misc Metadata",
        HED_ID_SECTION: "Modified Hed Ids",
        EXTRAS_SECTION: "Extras",
    }

    # TODO: Check that the cases of these are correct.
    DF_EXTRAS = {SOURCES, PREFIXES, ANNOTATION_PROPERTY_EXTERNAL}

    def __init__(self, schema1, schema2):
        """Initialize the SchemaComparer with two schemas."""
        self.schema1 = schema1
        self.schema2 = schema2

    def find_matching_tags(self, sections=(HedSectionKey.Tags,), return_string=True):
        """Compare the tags in the two schemas."""
        matches, _, _, unequal_entries = self.compare_schemas(sections=sections)
        header_summary = self._get_tag_name_summary((matches, unequal_entries))

        # Combine the two dictionaries
        for section_key, section_dict in matches.items():
            section_dict.update(unequal_entries[section_key])

        if return_string:
            final_string = "Nodes with matching names:\n"
            final_string += self._pretty_print_header(header_summary)
            return final_string
        return matches

    def compare_schemas(self, attribute_filter=HedKey.InLibrary, sections=(HedSectionKey.Tags,)):
        """Compare the two schemas section by section."""
        matches, not_in_schema2, not_in_schema1, unequal_entries = {}, {}, {}, {}

        # Handle miscellaneous sections
        if sections is None or self.MISC_SECTION in sections:
            unequal_entries[self.MISC_SECTION] = {}
            if self.schema1.get_save_header_attributes() != self.schema2.get_save_header_attributes():
                unequal_entries[self.MISC_SECTION]['header_attributes'] = \
                    (str(self.schema1.get_save_header_attributes()), str(self.schema2.get_save_header_attributes()))
            if self.schema1.prologue != self.schema2.prologue:
                unequal_entries[self.MISC_SECTION]['prologue'] = (self.schema1.prologue, self.schema2.prologue)
            if self.schema1.epilogue != self.schema2.epilogue:
                unequal_entries[self.MISC_SECTION]['epilogue'] = (self.schema1.epilogue, self.schema2.epilogue)

        # Compare sections
        for section_key in HedSectionKey:
            if sections is not None and section_key not in sections:
                continue
            dict1, dict2 = {}, {}
            section1, section2 = self.schema1[section_key], self.schema2[section_key]
            name_attribute = 'short_tag_name' if section_key == HedSectionKey.Tags else 'name'

            for entry in section1.all_entries:
                if not attribute_filter or entry.has_attribute(attribute_filter):
                    dict1[getattr(entry, name_attribute)] = entry

            for entry in section2.all_entries:
                if not attribute_filter or entry.has_attribute(attribute_filter):
                    dict2[getattr(entry, name_attribute)] = entry

            not_in_schema2[section_key] = {key: dict1[key] for key in dict1 if key not in dict2}
            not_in_schema1[section_key] = {key: dict2[key] for key in dict2 if key not in dict1}
            unequal_entries[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                            if key in dict2 and dict1[key] != dict2[key]}
            matches[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                    if key in dict2 and dict1[key] == dict2[key]}

        return matches, not_in_schema1, not_in_schema2, unequal_entries

    def gather_schema_changes(self, attribute_filter=None):
        """Generate a changelog by comparing the two schemas."""
        _, not_in_1, not_in_2, unequal_entries = self.compare_schemas(attribute_filter=attribute_filter, sections=None)
        change_dict = defaultdict(list)
        self._add_removed_items(change_dict, not_in_2)
        self._add_added_items(change_dict, not_in_1)
        self._add_unequal_entries(change_dict, unequal_entries)
        self._add_extras_changes(change_dict)
        self._sort_changes_by_severity(change_dict)
        return {key: change_dict[key] for key in self.SECTION_ENTRY_NAMES if key in change_dict}

    def pretty_print_change_dict(self, change_dict, title="Schema changes", use_markdown=True):
        """Format the change dictionary into a string."""
        final_strings = []
        line_prefix = " - " if use_markdown else "\t"
        if change_dict:
            final_strings.append(title)
            final_strings.append("")  # add blank line
            for section_key, section_dict in change_dict.items():
                name = self.SECTION_ENTRY_NAMES_PLURAL.get(section_key, section_key)
                line_endings = "**" if use_markdown else ""
                final_strings.append(f"{line_endings}{name}:{line_endings}")
                for item in section_dict:
                    change, tag, change_type = item['change'], item['tag'], item['change_type']
                    final_strings.append(f"{line_prefix}{tag} ({change_type}): {change}")
                final_strings.append("")
        return "\n".join(final_strings)

    def compare_differences(self, attribute_filter=None, title=""):
        """Compare the tags and extras in the two schemas, reporting all differences."""
        changelog = self.gather_schema_changes(attribute_filter=attribute_filter)
        if not title:
            title = f"Differences between {self.schema1.name} and {self.schema2.name}"
        return self.pretty_print_change_dict(changelog, title=title)

    # Private helper methods
    def _pretty_print_header(self, summary_dict):
        """Format a summary dictionary of tag names by section into a string."""
        output_string = ""
        first_entry = True
        for section_key, tag_names in summary_dict.items():
            if not tag_names:
                continue
            type_name = self.SECTION_ENTRY_NAMES_PLURAL[section_key]
            if not first_entry:
                output_string += "\n"
            output_string += f"{type_name}: "
            output_string += ", ".join(sorted(tag_names))
            output_string += "\n"
            first_entry = False
        return output_string

    @staticmethod
    def _get_tag_name_summary(tag_dicts):
        """Combine dictionaries into a summary of tag names by section."""
        out_dict = {section_key: [] for section_key in HedSectionKey}
        for tag_dict in tag_dicts:
            for section_key, section in tag_dict.items():
                out_dict[section_key].extend(section.keys())
        return out_dict

    def _add_removed_items(self, change_dict, not_in_2):
        """Add removed items to the change dictionary."""
        for section_key, section in not_in_2.items():
            for tag, _ in section.items():
                type_name = self.SECTION_ENTRY_NAMES_PLURAL[section_key]
                change_type = 'Major' if section_key == HedSectionKey.Tags else 'Unknown'
                change_dict[section_key].append(
                    {'change_type': change_type, 'change': f'Tag {tag} deleted from {type_name}', 'tag': tag}
                )

    @staticmethod
    def _add_added_items(change_dict, not_in_1):
        """Add added items to the change dictionary."""
        for section_key, section in not_in_1.items():
            for tag, _ in section.items():
                change_dict[section_key].append(
                    {'change_type': 'Minor', 'change': f'Item {tag} added', 'tag': tag}
                )

    def _add_unequal_entries(self, change_dict, unequal_entries):
        """Add unequal entries to the change dictionary."""
        for section_key, changes in unequal_entries.items():
            if section_key == self.MISC_SECTION:
                self._add_misc_section_changes(change_dict, section_key, changes)
            elif section_key in self.DF_EXTRAS:
                self._add_extras_section_changes(change_dict, section_key, changes)
            else:
                for tag, (entry1, entry2) in changes.items():
                    if section_key == HedSectionKey.UnitClasses:
                        self._add_unit_classes_changes(change_dict, section_key, entry1, entry2)
                    elif section_key == HedSectionKey.Tags:
                        self._add_tag_changes(change_dict, section_key, entry1, entry2)
                    self._check_other_attributes(change_dict, section_key, entry1, entry2)
                    if entry1.description != entry2.description:
                        change_dict[section_key].append(
                            {'change_type': 'Patch', 'change': f'Description of {tag} modified', 'tag': tag})

    @staticmethod
    def _add_misc_section_changes(change_dict, section_key, changes):
        """Add changes for the misc section to the change dictionary."""
        for misc_section, (value1, value2) in changes.items():
            change_type = 'Patch' if "prologue" in misc_section or "epilogue" in misc_section else 'Patch'
            change_desc = f'{misc_section} changed' if "prologue" in misc_section or "epilogue" in misc_section \
                else f'{misc_section} changed from {value1} to {value2}'
            change_dict[section_key].append({'change_type': change_type, 'change': change_desc, 'tag': misc_section})

    def _add_extras_section_changes(self, change_dict, section_key, changes):
        """Add changes for extras sections (dataframes) to the change dictionary."""
        pass  # Placeholder for extras section changes logic.

    @staticmethod
    def _add_unit_classes_changes(change_dict, section_key, entry1, entry2):
        """Add changes for unit classes to the change dictionary."""
        for unit in entry1.units:
            if unit not in entry2.units:
                change_dict[section_key].append(
                    {'change_type': 'Major', 'change': f'Unit {unit} removed from {entry1.name}', 'tag': entry1.name}
                )
        for unit in entry2.units:
            if unit not in entry1.units:
                change_dict[section_key].append(
                    {'change_type': 'Patch', 'change': f'Unit {unit} added to {entry2.name}', 'tag': entry1.name}
                )

    def _add_tag_changes(self, change_dict, section_key, entry1, entry2):
        """Add changes for tags to the change dictionary."""
        for unit_class in entry1.unit_classes:
            if unit_class not in entry2.unit_classes:
                change_dict[section_key].append(
                    {'change_type': 'Major', 'change': f'Unit class {unit_class} removed from {entry1.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        for unit_class in entry2.unit_classes:
            if unit_class not in entry1.unit_classes:
                change_dict[section_key].append(
                    {'change_type': 'Patch', 'change': f'Unit class {unit_class} added to {entry2.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        for value_class in entry1.value_classes:
            if value_class not in entry2.value_classes:
                change_dict[section_key].append(
                    {'change_type': 'Unknown', 'change': f'Value class {value_class} removed from {entry1.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        for value_class in entry2.value_classes:
            if value_class not in entry1.value_classes:
                change_dict[section_key].append(
                    {'change_type': 'Minor', 'change': f'Value class {value_class} added to {entry2.short_tag_name}',
                     'tag': entry1.short_tag_name}
                )
        if entry1.long_tag_name != entry2.long_tag_name:
            change_dict[section_key].append(
                {'change_type': 'Minor',
                 'change': f'Tag {entry1.short_tag_name} moved in schema from {entry1.long_tag_name} to {entry2.long_tag_name}',
                 'tag': entry1.short_tag_name}
            )
        self._add_suggested_tag_changes(change_dict, entry1, entry2, HedKey.SuggestedTag, "Suggested tag")
        self._add_suggested_tag_changes(change_dict, entry1, entry2, HedKey.RelatedTag, "Related tag")

    @staticmethod
    def _add_suggested_tag_changes(change_dict, entry1, entry2, attribute, label):
        """Add changes for suggested or related tags to the change dictionary."""
        related_tag1 = ", ".join(sorted(entry1.inherited_attributes.get(attribute, "").split(",")))
        related_tag2 = ", ".join(sorted(entry2.inherited_attributes.get(attribute, "").split(",")))
        if related_tag1 != related_tag2:
            if not related_tag1:
                related_tag1 = "empty"
            if not related_tag2:
                related_tag2 = "empty"
            change_dict[HedSectionKey.Tags].append(
                {'change_type': 'Patch',
                 'change': f'{label} changed on {entry1.short_tag_name} from {related_tag1} to {related_tag2}',
                 'tag': entry1.short_tag_name})

    def _check_other_attributes(self, change_dict, section_key, entry1, entry2):
        """Compare non-specialized attributes and add differences to the change dictionary."""
        already_checked_attributes = [HedKey.RelatedTag, HedKey.SuggestedTag, HedKey.ValueClass, HedKey.UnitClass]
        unique_keys = set(entry1.attributes.keys()).union(entry2.attributes.keys())
        if section_key == HedSectionKey.Tags:
            unique_inherited_keys = set(entry1.inherited_attributes.keys()).union(entry2.inherited_attributes.keys())
        else:
            unique_inherited_keys = unique_keys
        all_unique_keys = unique_keys.union(unique_inherited_keys).difference(already_checked_attributes)

        for key in all_unique_keys:
            is_inherited = key in unique_inherited_keys
            is_direct = key in unique_keys

            if section_key == HedSectionKey.Tags:
                value1 = entry1.inherited_attributes.get(key)
                value2 = entry2.inherited_attributes.get(key)
            else:
                value1 = entry1.attributes.get(key)
                value2 = entry2.attributes.get(key)

            if value1 != value2:
                change_type = "Patch"
                start_text = f"Attribute {key} "
                if is_inherited and not is_direct:
                    change_type = "Minor"
                    start_text = f"Inherited attribute {key} "

                if value1 is True and value2 is None:
                    end_text = "removed"
                elif value1 is None and value2 is True:
                    end_text = "added"
                else:
                    end_text = f"modified from {value1} to {value2}"

                use_section_key = section_key
                if key == HedKey.HedID:
                    use_section_key = self.HED_ID_SECTION
                change_dict[use_section_key].append({
                    "change_type": change_type,
                    "change": f"{start_text}{end_text}",
                    "tag": entry1.name if section_key != HedSectionKey.Tags else entry1.short_tag_name,
                    "section": section_key
                })

    def _add_extras_changes(self, change_dict):
        """Compare the extras (dataframes) in two schemas and add differences to the change dictionary."""
        from hed.schema.schema_io.df_constants import extras_column_dict, UNIQUE_EXTRAS_KEYS

        extras1 = getattr(self.schema1, "extras", {}) or {}
        extras2 = getattr(self.schema2, "extras", {}) or {}

        all_keys = set(extras1.keys()).union(extras2.keys())
        for key in all_keys:
            df1 = extras1.get(key)
            df2 = extras2.get(key)
            if df1 is None and df2 is not None:
                change_dict[key].append({'change_type': 'Minor', 'change': f'Entire {key} section missing in first schema', 'tag': key})
                continue
            if df2 is None and df1 is not None:
                change_dict[key].append({'change_type': 'Minor', 'change': f'Entire {key} section missing in second schema', 'tag': key})
                continue
            if df1 is None and df2 is None:
                continue

            df1 = df1.copy()
            df2 = df2.copy()
            df1.columns = [c.lower() for c in df1.columns]
            df2.columns = [c.lower() for c in df2.columns]

            key_cols = UNIQUE_EXTRAS_KEYS.get(key)
            if not key_cols:
                key_cols = list(set(df1.columns) & set(df2.columns))

            compare_cols = list(set(df1.columns) & set(df2.columns))
            if not compare_cols:
                continue

            df1 = df1[compare_cols]
            df2 = df2[compare_cols]

            diff_results = self._compare_dataframes(df1, df2, key_cols)
            for diff in diff_results:
                row_key = diff['row']
                cols = diff['cols']
                msg = diff['message']
                if msg == 'Row missing in first schema':
                    change_dict[key].append({'change_type': 'Minor', 'change': f'Row {row_key} missing in first schema', 'tag': str(row_key)})
                elif msg == 'Row missing in second schema':
                    change_dict[key].append({'change_type': 'Minor', 'change': f'Row {row_key} missing in second schema', 'tag': str(row_key)})
                elif msg == 'Duplicate keys found':
                    change_dict[key].append({'change_type': 'Unknown', 'change': f'Duplicate key {row_key} found in one or both schemas', 'tag': str(row_key)})
                elif msg == 'Column values differ':
                    col_str = ', '.join(cols) if cols else ''
                    change_dict[key].append({'change_type': 'Patch', 'change': f'Row {row_key} columns differ: {col_str}', 'tag': str(row_key)})

    @staticmethod
    def _compare_dataframes(df1, df2, key_cols):
        """Compare two dataframes by key columns and report row/column differences."""
        results = []

        df1_indexed = df1.set_index(key_cols)
        df2_indexed = df2.set_index(key_cols)

        all_keys = set(df1_indexed.index).union(df2_indexed.index)

        for key in all_keys:
            if key not in df1_indexed.index:
                results.append({'row': key, 'cols': None, 'message': 'Row missing in first schema'})
            elif key not in df2_indexed.index:
                results.append({'row': key, 'cols': None, 'message': 'Row missing in second schema'})
            else:
                row1 = df1_indexed.loc[key]
                row2 = df2_indexed.loc[key]

                if isinstance(row1, pd.DataFrame) or isinstance(row2, pd.DataFrame):
                    results.append({'row': key, 'cols': None, 'message': 'Duplicate keys found'})
                    continue

                unequal_cols = [col for col in df1.columns if col not in key_cols and row1[col] != row2[col]]
                if unequal_cols:
                    results.append({'row': key, 'cols': unequal_cols, 'message': 'Column values differ'})

        return results

    @staticmethod
    def _sort_changes_by_severity(changes_dict):
        """Sort the changelist by severity.

        Parameters:
            changes_dict (dict): Dictionary mapping section keys to lists of change dicts.
        """
        for section in changes_dict.values():
            order = {'Major': 1, 'Minor': 2, 'Patch': 3, 'Unknown': 4}
            section.sort(key=lambda x: order.get(x['change_type'], order['Unknown']))

compare_differences

compare_differences(attribute_filter=None, title='')

Compare the tags and extras in the two schemas, reporting all differences.

Source code in hed/schema/schema_comparer.py
def compare_differences(self, attribute_filter=None, title=""):
    """Compare the tags and extras in the two schemas, reporting all differences."""
    changelog = self.gather_schema_changes(attribute_filter=attribute_filter)
    if not title:
        title = f"Differences between {self.schema1.name} and {self.schema2.name}"
    return self.pretty_print_change_dict(changelog, title=title)

compare_schemas

compare_schemas(
    attribute_filter=HedKey.InLibrary,
    sections=(HedSectionKey.Tags,),
)

Compare the two schemas section by section.

Source code in hed/schema/schema_comparer.py
def compare_schemas(self, attribute_filter=HedKey.InLibrary, sections=(HedSectionKey.Tags,)):
    """Compare the two schemas section by section."""
    matches, not_in_schema2, not_in_schema1, unequal_entries = {}, {}, {}, {}

    # Handle miscellaneous sections
    if sections is None or self.MISC_SECTION in sections:
        unequal_entries[self.MISC_SECTION] = {}
        if self.schema1.get_save_header_attributes() != self.schema2.get_save_header_attributes():
            unequal_entries[self.MISC_SECTION]['header_attributes'] = \
                (str(self.schema1.get_save_header_attributes()), str(self.schema2.get_save_header_attributes()))
        if self.schema1.prologue != self.schema2.prologue:
            unequal_entries[self.MISC_SECTION]['prologue'] = (self.schema1.prologue, self.schema2.prologue)
        if self.schema1.epilogue != self.schema2.epilogue:
            unequal_entries[self.MISC_SECTION]['epilogue'] = (self.schema1.epilogue, self.schema2.epilogue)

    # Compare sections
    for section_key in HedSectionKey:
        if sections is not None and section_key not in sections:
            continue
        dict1, dict2 = {}, {}
        section1, section2 = self.schema1[section_key], self.schema2[section_key]
        name_attribute = 'short_tag_name' if section_key == HedSectionKey.Tags else 'name'

        for entry in section1.all_entries:
            if not attribute_filter or entry.has_attribute(attribute_filter):
                dict1[getattr(entry, name_attribute)] = entry

        for entry in section2.all_entries:
            if not attribute_filter or entry.has_attribute(attribute_filter):
                dict2[getattr(entry, name_attribute)] = entry

        not_in_schema2[section_key] = {key: dict1[key] for key in dict1 if key not in dict2}
        not_in_schema1[section_key] = {key: dict2[key] for key in dict2 if key not in dict1}
        unequal_entries[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                        if key in dict2 and dict1[key] != dict2[key]}
        matches[section_key] = {key: (dict1[key], dict2[key]) for key in dict1
                                if key in dict2 and dict1[key] == dict2[key]}

    return matches, not_in_schema1, not_in_schema2, unequal_entries

find_matching_tags

find_matching_tags(
    sections=(HedSectionKey.Tags,), return_string=True
)

Compare the tags in the two schemas.

Source code in hed/schema/schema_comparer.py
def find_matching_tags(self, sections=(HedSectionKey.Tags,), return_string=True):
    """Compare the tags in the two schemas."""
    matches, _, _, unequal_entries = self.compare_schemas(sections=sections)
    header_summary = self._get_tag_name_summary((matches, unequal_entries))

    # Combine the two dictionaries
    for section_key, section_dict in matches.items():
        section_dict.update(unequal_entries[section_key])

    if return_string:
        final_string = "Nodes with matching names:\n"
        final_string += self._pretty_print_header(header_summary)
        return final_string
    return matches

gather_schema_changes

gather_schema_changes(attribute_filter=None)

Generate a changelog by comparing the two schemas.

Source code in hed/schema/schema_comparer.py
def gather_schema_changes(self, attribute_filter=None):
    """Generate a changelog by comparing the two schemas."""
    _, not_in_1, not_in_2, unequal_entries = self.compare_schemas(attribute_filter=attribute_filter, sections=None)
    change_dict = defaultdict(list)
    self._add_removed_items(change_dict, not_in_2)
    self._add_added_items(change_dict, not_in_1)
    self._add_unequal_entries(change_dict, unequal_entries)
    self._add_extras_changes(change_dict)
    self._sort_changes_by_severity(change_dict)
    return {key: change_dict[key] for key in self.SECTION_ENTRY_NAMES if key in change_dict}

pretty_print_change_dict

pretty_print_change_dict(
    change_dict, title="Schema changes", use_markdown=True
)

Format the change dictionary into a string.

Source code in hed/schema/schema_comparer.py
def pretty_print_change_dict(self, change_dict, title="Schema changes", use_markdown=True):
    """Format the change dictionary into a string."""
    final_strings = []
    line_prefix = " - " if use_markdown else "\t"
    if change_dict:
        final_strings.append(title)
        final_strings.append("")  # add blank line
        for section_key, section_dict in change_dict.items():
            name = self.SECTION_ENTRY_NAMES_PLURAL.get(section_key, section_key)
            line_endings = "**" if use_markdown else ""
            final_strings.append(f"{line_endings}{name}:{line_endings}")
            for item in section_dict:
                change, tag, change_type = item['change'], item['tag'], item['change_type']
                final_strings.append(f"{line_prefix}{tag} ({change_type}): {change}")
            final_strings.append("")
    return "\n".join(final_strings)

HED Cache Functions

hed_cache

Infrastructure for caching HED schema from remote repositories.

cache_local_versions

cache_local_versions(cache_folder) -> int

Cache all schemas included with the HED installation.

Parameters:

Name Type Description Default
cache_folder str

The folder holding the cache.

required

Returns:

Type Description
int

int or None: Returns -1 on cache access failure. None otherwise

Source code in hed/schema/hed_cache.py
def cache_local_versions(cache_folder) -> int:
    """ Cache all schemas included with the HED installation.

    Parameters:
        cache_folder (str): The folder holding the cache.

    Returns:
        int or None: Returns -1 on cache access failure.  None otherwise

    """
    if not cache_folder:
        cache_folder = HED_CACHE_DIRECTORY

    try:
        with CacheLock(cache_folder, write_time=False):
            _copy_installed_folder_to_cache(cache_folder)
    except CacheException:
        return -1

cache_xml_versions

cache_xml_versions(
    hed_base_urls=DEFAULT_URL_LIST,
    hed_library_urls=DEFAULT_LIBRARY_URL_LIST,
    skip_folders=DEFAULT_SKIP_FOLDERS,
    cache_folder=None,
) -> float

Cache all schemas at the given URLs.

Parameters:

Name Type Description Default
hed_base_urls str or list

Path or list of paths. These should point to a single folder.

DEFAULT_URL_LIST
hed_library_urls str or list

Path or list of paths. These should point to folder containing library folders.

DEFAULT_LIBRARY_URL_LIST
skip_folders list

A list of subfolders to skip over when downloading.

DEFAULT_SKIP_FOLDERS
cache_folder str

The folder holding the cache.

None

Returns:

Name Type Description
float float

Returns -1 if cache failed for any reason, including having been cached too recently. Returns 0 if it successfully cached this time.

Notes
  • The Default skip_folders is 'deprecated'.
  • The HED cache folder defaults to HED_CACHE_DIRECTORY.
  • The directories on GitHub are of the form: https://api.github.com/repos/hed-standard/hed-schemas/contents/standard_schema
Source code in hed/schema/hed_cache.py
def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, hed_library_urls=DEFAULT_LIBRARY_URL_LIST,
                       skip_folders=DEFAULT_SKIP_FOLDERS, cache_folder=None) -> float:
    """ Cache all schemas at the given URLs.

    Parameters:
        hed_base_urls (str or list): Path or list of paths.   These should point to a single folder.
        hed_library_urls (str or list): Path or list of paths.  These should point to folder containing library folders.
        skip_folders (list): A list of subfolders to skip over when downloading.
        cache_folder (str): The folder holding the cache.

    Returns:
        float: Returns -1 if cache failed for any reason, including having been cached too recently.
               Returns 0 if it successfully cached this time.

    Notes:
        - The Default skip_folders is 'deprecated'.
        - The HED cache folder defaults to HED_CACHE_DIRECTORY.
        - The directories on GitHub are of the form:
            https://api.github.com/repos/hed-standard/hed-schemas/contents/standard_schema

    """
    if not cache_folder:
        cache_folder = HED_CACHE_DIRECTORY

    try:
        with CacheLock(cache_folder):
            if isinstance(hed_base_urls, str):
                hed_base_urls = [hed_base_urls]
            if isinstance(hed_library_urls, str):
                hed_library_urls = [hed_library_urls]
            all_hed_versions = {}
            for hed_base_url in hed_base_urls:
                new_hed_versions = _get_hed_xml_versions_one_library(hed_base_url)
                _merge_in_versions(all_hed_versions, new_hed_versions)
            for hed_library_url in hed_library_urls:
                new_hed_versions = _get_hed_xml_versions_from_url_all_libraries(hed_library_url,
                                                                                skip_folders=skip_folders)
                _merge_in_versions(all_hed_versions, new_hed_versions)

            for library_name, hed_versions in all_hed_versions.items():
                for version, version_info in hed_versions.items():
                    _cache_hed_version(version, library_name, version_info, cache_folder=cache_folder)

    except CacheException or ValueError or URLError:
        return -1

    return 0

get_cache_directory

get_cache_directory(cache_folder=None) -> str

Return the current value of HED_CACHE_DIRECTORY.

Parameters:

Name Type Description Default
cache_folder str

Optional cache folder override.

None

Returns:

Name Type Description
str str

The cache directory path.

Source code in hed/schema/hed_cache.py
def get_cache_directory(cache_folder=None) -> str:
    """ Return the current value of HED_CACHE_DIRECTORY.

    Parameters:
        cache_folder (str): Optional cache folder override.

    Returns:
        str: The cache directory path.
    """
    if cache_folder:
        return cache_folder
    return HED_CACHE_DIRECTORY

get_hed_version_path

get_hed_version_path(
    xml_version,
    library_name=None,
    local_hed_directory=None,
    check_prerelease=False,
)

Get HED XML file path in a directory. Only returns filenames that exist.

Parameters:

Name Type Description Default
library_name str or None

Optional the schema library name.

None
xml_version str

Returns this version if it exists

required
local_hed_directory str

Path to local HED directory. Defaults to HED_CACHE_DIRECTORY

None
check_prerelease bool

Also check for prerelease schemas

False

Returns: str: The path to the latest HED version the HED directory.

Source code in hed/schema/hed_cache.py
def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None, check_prerelease=False):
    """ Get HED XML file path in a directory.  Only returns filenames that exist.

    Parameters:
        library_name (str or None): Optional the schema library name.
        xml_version (str): Returns this version if it exists
        local_hed_directory (str): Path to local HED directory.  Defaults to HED_CACHE_DIRECTORY
        check_prerelease (bool): Also check for prerelease schemas
    Returns:
        str: The path to the latest HED version the HED directory.

    """
    if not local_hed_directory:
        local_hed_directory = HED_CACHE_DIRECTORY

    hed_versions = get_hed_versions(local_hed_directory, library_name, check_prerelease)
    if not hed_versions or not xml_version:
        return None
    if xml_version in hed_versions:
        return _create_xml_filename(xml_version, library_name, local_hed_directory, check_prerelease)

get_hed_versions

get_hed_versions(
    local_hed_directory=None,
    library_name=None,
    check_prerelease=False,
) -> Union[list, dict]

Get the HED versions in the HED directory.

Parameters:

Name Type Description Default
local_hed_directory str

Directory to check for versions which defaults to hed_cache.

None
library_name str or None

An optional schema library name. None retrieves the standard schema only. Pass "all" to retrieve all standard and library schemas as a dict.

None
check_prerelease bool

If True, results can include prerelease schemas

False

Returns:

Type Description
Union[list, dict]

Union[list, dict]: List of version numbers or dictionary {library_name: [versions]}.

Source code in hed/schema/hed_cache.py
def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelease=False) -> Union[list, dict]:
    """ Get the HED versions in the HED directory.

    Parameters:
        local_hed_directory (str): Directory to check for versions which defaults to hed_cache.
        library_name (str or None): An optional schema library name.
                                    None retrieves the standard schema only.
                                    Pass "all" to retrieve all standard and library schemas as a dict.
        check_prerelease (bool): If True, results can include prerelease schemas

    Returns:
        Union[list, dict]: List of version numbers or dictionary {library_name: [versions]}.

    """
    if not local_hed_directory:
        local_hed_directory = HED_CACHE_DIRECTORY

    if not library_name:
        library_name = None

    all_hed_versions = {}
    local_directories = [local_hed_directory]
    if check_prerelease and not local_hed_directory.endswith(prerelease_suffix):
        local_directories.append(os.path.join(local_hed_directory, "prerelease"))

    hed_files = []
    for hed_dir in local_directories:
        try:
            hed_files += os.listdir(hed_dir)
        except FileNotFoundError:
            pass
    if not hed_files:
        cache_local_versions(local_hed_directory)
        for hed_dir in local_directories:
            try:
                hed_files += os.listdir(hed_dir)
            except FileNotFoundError:
                pass
    for hed_file in hed_files:
        expression_match = version_pattern.match(hed_file)
        if expression_match is not None:
            version = expression_match.group(3)
            found_library_name = expression_match.group(2)
            if library_name != "all" and found_library_name != library_name:
                continue
            if found_library_name not in all_hed_versions:
                all_hed_versions[found_library_name] = []
            all_hed_versions[found_library_name].append(version)
    for name, hed_versions in all_hed_versions.items():
        all_hed_versions[name] = _sort_version_list(hed_versions)
    if library_name in all_hed_versions:
        return all_hed_versions[library_name]
    return all_hed_versions

get_library_data cached

get_library_data(library_name, cache_folder=None) -> dict

Retrieve the library data for the given library.

Currently, this is just the valid ID range.

Parameters:

Name Type Description Default
library_name str

The schema name. "" for standard schema.

required
cache_folder str

The cache folder to use if not using the default.

None

Returns:

Name Type Description
dict dict

The data for a specific library.

Source code in hed/schema/hed_cache.py
@functools.lru_cache(maxsize=50)
def get_library_data(library_name, cache_folder=None) -> dict:
    """Retrieve the library data for the given library.

       Currently, this is just the valid ID range.

       Parameters:
           library_name (str): The schema name.  "" for standard schema.
           cache_folder (str): The cache folder to use if not using the default.

       Returns:
           dict: The data for a specific library.
    """
    if cache_folder is None:
        cache_folder = HED_CACHE_DIRECTORY

    cache_lib_data_folder = os.path.join(cache_folder, "library_data")

    local_library_data_filename = os.path.join(cache_lib_data_folder, "library_data.json")
    try:
        with open(local_library_data_filename) as file:
            library_data = json.load(file)
        specific_library = library_data[library_name]
        return specific_library
    except (OSError, CacheException, ValueError, KeyError):
        pass

    try:
        with CacheLock(cache_lib_data_folder, write_time=False):
            _copy_installed_folder_to_cache(cache_lib_data_folder, "library_data")

        with open(local_library_data_filename) as file:
            library_data = json.load(file)
        specific_library = library_data[library_name]
        return specific_library
    except (OSError, CacheException, ValueError, KeyError):
        pass

    try:
        with CacheLock(cache_lib_data_folder):
            # if this fails it'll fail to load in the next step
            _cache_specific_url(LIBRARY_DATA_URL, local_library_data_filename)
        with open(local_library_data_filename) as file:
            library_data = json.load(file)
        specific_library = library_data[library_name]
        return specific_library
    except (OSError, CacheException, ValueError, URLError, KeyError):
        pass

    # This failed to get any data for some reason
    return {}

set_cache_directory

set_cache_directory(new_cache_dir)

Set default global HED cache directory.

Parameters:

Name Type Description Default
new_cache_dir str

Directory to check for versions.

required
Source code in hed/schema/hed_cache.py
def set_cache_directory(new_cache_dir):
    """ Set default global HED cache directory.

    Parameters:
        new_cache_dir (str): Directory to check for versions.

    """
    if new_cache_dir:
        global HED_CACHE_DIRECTORY
        HED_CACHE_DIRECTORY = new_cache_dir
        os.makedirs(new_cache_dir, exist_ok=True)

Schema Validation Utilities

schema_validation_util

Utilities used in HED validation/loading using a HED schema.

get_allowed_characters

get_allowed_characters(value_classes) -> set[str]

Returns the allowed characters in a given container of value classes

Parameters:

Name Type Description Default
value_classes list of HedSchemaEntry

A list of schema entries that should have the allowedCharacter attribute

required

Returns:

Type Description
set[str]

set[str]: The set of all characters from the given classes

Source code in hed/schema/schema_validation_util.py
def get_allowed_characters(value_classes) -> set[str]:
    """Returns the allowed characters in a given container of value classes

    Parameters:
        value_classes (list of HedSchemaEntry): A list of schema entries that should have the allowedCharacter attribute

    Returns:
        set[str]: The set of all characters from the given classes
    """
    # This could be pre-computed
    character_set_names = []

    for value_class in value_classes:
        allowed_types = value_class.attributes.get(constants.HedKey.AllowedCharacter, "").split(",")
        character_set_names.extend(allowed_types)

    character_set = get_allowed_characters_by_name(character_set_names)
    # for now, just always allow these special cases(it's validated extensively elsewhere)
    character_set.update("#/")
    return character_set

get_allowed_characters_by_name

get_allowed_characters_by_name(
    character_set_names,
) -> set[str]

Returns the allowed characters from a list of character set names

Note: "nonascii" is a special case "character" that can be included as well

Parameters:

Name Type Description Default
character_set_names list of str

A list of character sets to allow. See hed_schema_constants.character_types

required

Returns:

Type Description
set[str]

set[str]: The set of all characters from the names

Source code in hed/schema/schema_validation_util.py
def get_allowed_characters_by_name(character_set_names) -> set[str]:
    """Returns the allowed characters from a list of character set names

    Note: "nonascii" is a special case "character" that can be included as well

    Parameters:
        character_set_names (list of str): A list of character sets to allow.  See hed_schema_constants.character_types

    Returns:
        set[str]: The set of all characters from the names
    """
    character_set = set()
    for name in character_set_names:
        if name in character_types and name != "nonascii":
            character_set.update(character_types[name])
        else:
            character_set.add(name)
    return character_set

get_problem_indexes

get_problem_indexes(
    validation_string, character_set, index_adj=0
) -> list[tuple[str, int]]

Finds indexes with values not in character set

Parameters:

Name Type Description Default
validation_string str

The string to check characters in.

required
character_set set

The list of valid characters (or the value "nonascii" as a set entry).

required
index_adj int

The value to adjust the reported indices by, if this isn't the start of a string.

0

Returns:

Type Description
list[tuple[str, int]]

list[tuple[str, int]]: The list of problematic characters and their indices.

Source code in hed/schema/schema_validation_util.py
def get_problem_indexes(validation_string, character_set, index_adj=0) -> list[tuple[str, int]]:
    """Finds indexes with values not in character set

    Parameters:
        validation_string (str): The string to check characters in.
        character_set (set): The list of valid characters (or the value "nonascii" as a set entry).
        index_adj (int): The value to adjust the reported indices by, if this isn't the start of a string.

    Returns:
        list[tuple[str, int]]: The list of problematic characters and their indices.
    """
    if not character_set:
        return []

    indexes = [(char, index + index_adj) for index, char in enumerate(validation_string) if char not in character_set]
    if "nonascii" in character_set:
        indexes = [(char, index) for char, index in indexes if not ord(char) > 127]

    return indexes

schema_version_for_library

schema_version_for_library(
    hed_schema, library_name
) -> Union[str, None]

Given the library name and HED schema object, return the version

Parameters:

Name Type Description Default
hed_schema HedSchema

the schema object

required
library_name str or None

The library name you're interested in. "" for the standard schema.

required

Returns:

Type Description
Union[str, None]

Union[str, None]: The version number of the given library name. Returns None if unknown library_name.

Source code in hed/schema/schema_validation_util.py
def schema_version_for_library(hed_schema, library_name) -> Union[str, None]:
    """ Given the library name and HED schema object, return the version

    Parameters:
        hed_schema (HedSchema): the schema object
        library_name (str or None): The library name you're interested in.  "" for the standard schema.

    Returns:
        Union[str, None]: The version number of the given library name.  Returns None if unknown library_name.
    """
    if library_name is None:
        library_name = ""
    names = hed_schema.library.split(",")
    versions = hed_schema.version_number.split(",")
    for name, version in zip(names, versions):
        if name == library_name:
            return version

    # Return the partnered schema version
    if library_name == "" and hed_schema.with_standard:
        return hed_schema.with_standard
    return None

validate_schema_description_new

validate_schema_description_new(hed_entry) -> list[dict]

Check the description of the entry for invalid character issues

Parameters:

Name Type Description Default
hed_entry HedSchemaEntry

A single schema entry

required

Returns:

Type Description
list[dict]

list[dict]: A list issues pertaining to all invalid characters found in description. Each issue is a dictionary.

Source code in hed/schema/schema_validation_util.py
def validate_schema_description_new(hed_entry) -> list[dict]:
    """ Check the description of the entry for invalid character issues

    Parameters:
        hed_entry (HedSchemaEntry): A single schema entry

    Returns:
        list[dict]: A list issues pertaining to all invalid characters found in description. Each issue is a dictionary.
    """
    if not hed_entry.description:
        return []
    issues_list = []
    character_set = get_allowed_characters_by_name(["text", "comma"])
    indexes = get_problem_indexes(hed_entry.description, character_set)
    # Kludge, just get short name here if we have it for error reporting
    name = hed_entry.name
    if hasattr(hed_entry, "short_tag_name"):
        name = hed_entry.short_tag_name
    for char, index in indexes:

        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC,
                                                 hed_entry.description, name, problem_char=char, char_index=index)
    return issues_list

validate_schema_tag_new

validate_schema_tag_new(hed_entry) -> list[dict]

Check tag entry for capitalization and illegal characters.

Parameters:

Name Type Description Default
hed_entry HedTagEntry

A single tag entry

required

Returns:

Type Description
list[dict]

list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.

Source code in hed/schema/schema_validation_util.py
def validate_schema_tag_new(hed_entry) -> list[dict]:
    """ Check tag entry for capitalization and illegal characters.

    Parameters:
        hed_entry (HedTagEntry): A single tag entry

    Returns:
        list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.
    """
    issues_list = []
    hed_term = hed_entry.short_tag_name
    # Any # terms will have already been validated as the previous entry.
    if hed_term == "#":
        return issues_list

    if hed_term and hed_term[0] and not (hed_term[0].isdigit() or hed_term[0].isupper()):
        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CAPITALIZATION,
                                                 hed_term, char_index=0, problem_char=hed_term[0])
    issues_list += validate_schema_term_new(hed_entry, hed_term)
    return issues_list

validate_schema_term_new

validate_schema_term_new(
    hed_entry, hed_term=None
) -> list[dict]

Check the term for invalid character issues

Parameters:

Name Type Description Default
hed_entry HedSchemaEntry

A single schema entry

required
hed_term str or None

Use instead of hed_entry.name if present.

None

Returns:

Type Description
list[dict]

list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.

Source code in hed/schema/schema_validation_util.py
def validate_schema_term_new(hed_entry, hed_term=None) -> list[dict]:
    """ Check the term for invalid character issues

    Parameters:
        hed_entry (HedSchemaEntry): A single schema entry
        hed_term (str or None): Use instead of hed_entry.name if present.

    Returns:
        list[dict]: A list of all formatting issues found in the term. Each issue is a dictionary.
    """
    if not hed_term:
        hed_term = hed_entry.name
    issues_list = []
    # todo: potentially optimize this someday, as most values are the same
    character_set = get_allowed_characters_by_name(["name"] +
                                                   hed_entry.attributes.get("allowedCharacter", "").split(","))
    indexes = get_problem_indexes(hed_term, character_set)
    for char, index in indexes:
        issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG,
                                                 hed_term, char_index=index, problem_char=char)
    return issues_list