"""Abstract base class for loading HED schema files into HedSchema objects."""
import copy
from hed.schema.schema_io import schema_util
from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema.hed_schema import HedSchema
from hed.schema import hed_schema_constants as constants
from hed.schema.hed_schema_constants import HedKey
from abc import abstractmethod, ABC
from hed.schema import schema_header_util
from hed.schema import hed_schema_constants
from hed.schema.schema_io import df_constants
[docs]
class SchemaLoader(ABC):
"""Baseclass for schema loading, to handle basic errors and partnered schemas
Expected usage is SchemaLoaderXML.load(filename)
SchemaLoaderXML(filename) will load just the header_attributes
"""
def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""):
"""Loads the given schema from one of the two parameters.
Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A HED schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): The format of this file if needed(only for owl currently)
name(str or None): Optional user supplied identifier, by default uses filename
"""
if schema_as_string and filename:
raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", filename)
self.file_format = file_format
self.filename = filename
self.name = name if name else filename
self.schema_as_string = schema_as_string
self.appending_to_schema = False
try:
self.input_data = self._open_file()
except OSError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, self.name) from e
except TypeError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), self.name) from e
except ValueError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), self.name) from e
# self._schema.filename = filename
hed_attributes = self._get_header_attributes(self.input_data)
schema_header_util.validate_attributes(hed_attributes, name=self.name)
with_standard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "")
self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "")
version_number = hed_attributes.get(hed_schema_constants.VERSION_ATTRIBUTE, "")
if not schema:
self._schema = HedSchema()
else:
self._schema = schema
self.appending_to_schema = True
if not self._schema.with_standard:
raise HedFileError(
HedExceptions.SCHEMA_LOAD_FAILED,
"Loading multiple normal schemas as a merged one with the same namespace. "
"Ensure schemas have the withStandard header attribute set",
self.name,
)
elif with_standard != self._schema.with_standard:
raise HedFileError(
HedExceptions.SCHEMA_LOAD_FAILED,
f"Merging schemas requires same withStandard value ({with_standard} != {self._schema.with_standard}).",
self.name,
)
hed_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = self._schema.version_number + f",{version_number}"
hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}"
if name:
self._schema.name = name
self._schema.filename = filename
self._schema.header_attributes = hed_attributes
self._loading_merged = False
self.fatal_errors = []
@property
def schema(self):
"""The partially loaded schema if you are after just header attributes."""
return self._schema
[docs]
@classmethod
def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""):
"""Loads and returns the schema, including partnered schema if applicable.
Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A HED schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): If this is an owl file being loaded, this is the format.
Allowed values include: turtle, json-ld, and owl(xml)
name(str or None): Optional user supplied identifier, by default uses filename
Returns:
HedSchema: The new schema
"""
loader = cls(filename, schema_as_string, schema, file_format, name)
return loader._load()
def _load(self):
"""Parses the previously loaded data, including loading a partnered schema if needed.
Returns:
schema(HedSchema): The new schema
"""
self._loading_merged = True
# Do a full load of the standard schema if this is a partnered schema
if not self.appending_to_schema and self._schema.with_standard and not self._schema.merged:
from hed.schema.hed_schema_io import load_schema_version
saved_attr = self._schema.header_attributes
saved_format = self._schema.source_format
try:
base_version = load_schema_version(self._schema.with_standard)
except HedFileError as e:
raise HedFileError(
HedExceptions.SCHEMA_LIBRARY_INVALID,
message=f"Cannot load withStandard schema '{self._schema.with_standard}'",
filename=e.filename,
) from e
# Copy the non-alterable cached schema
self._schema = copy.deepcopy(base_version)
self._schema.filename = self.filename
self._schema.name = self.name # Manually set name here as we don't want to pass it to load_schema_version
self._schema.header_attributes = saved_attr
self._schema.source_format = saved_format
self._loading_merged = False
self._parse_data()
self._schema.finalize_dictionaries()
self.fix_extras()
return self._schema
@abstractmethod
def _open_file(self):
"""Overloaded versions should retrieve the input from filename/schema_as_string"""
pass
@abstractmethod
def _get_header_attributes(self, input_data):
"""Overloaded versions should return the header attributes from the input data."""
pass
@abstractmethod
def _parse_data(self):
"""Puts the input data into the new schema"""
pass
def _add_to_dict_base(self, entry, key_class):
if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged:
return None
if self.library and (
not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)
):
# only add it if not already present - This is a rare case
if not entry.has_attribute(HedKey.InLibrary):
entry._set_attribute_value(HedKey.InLibrary, self.library)
return self._schema._add_tag_to_dict(entry.name, entry, key_class)
[docs]
@staticmethod
def find_rooted_entry(tag_entry, schema, loading_merged):
"""This semi-validates rooted tags, raising an exception on major errors
Parameters:
tag_entry(HedTagEntry): the possibly rooted tag
schema(HedSchema): The schema being loaded
loading_merged(bool): If this schema was already merged before loading
Returns:
Union[HedTagEntry, None]: The base tag entry from the standard schema
Returns None if this tag isn't rooted
:raises HedFileError:
- A rooted attribute is found in a non-paired schema
- A rooted attribute is not a string
- A rooted attribute was found on a non-root node in an unmerged schema.
- A rooted attribute is found on a root node in a merged schema.
- A rooted attribute indicates a tag that doesn't exist in the base schema.
"""
rooted_tag = tag_entry.has_attribute(constants.HedKey.Rooted, return_value=True)
if rooted_tag is not None:
if not schema.with_standard:
raise HedFileError(
HedExceptions.SCHEMA_LIBRARY_INVALID,
f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.",
schema.name,
)
if not isinstance(rooted_tag, str):
raise HedFileError(
HedExceptions.SCHEMA_LIBRARY_INVALID,
f"Rooted tag '{tag_entry.short_tag_name}' is not a string.",
schema.name,
)
if tag_entry.parent_name and not loading_merged:
raise HedFileError(
HedExceptions.SCHEMA_LIBRARY_INVALID,
f"Found rooted tag '{tag_entry.short_tag_name}' as a non root node.",
schema.name,
)
if not tag_entry.parent_name and loading_merged:
raise HedFileError(
HedExceptions.SCHEMA_LIBRARY_INVALID,
f"Found rooted tag '{tag_entry.short_tag_name}' as a root node in a merged schema.",
schema.name,
)
rooted_entry = schema.tags.get(rooted_tag)
if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary):
raise HedFileError(
HedExceptions.SCHEMA_LIBRARY_INVALID,
f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema",
schema.name,
)
if loading_merged:
return None
return rooted_entry
return None
def _add_fatal_error(
self,
line_number,
line,
warning_message="Schema term is empty or the line is malformed",
error_code=HedExceptions.WIKI_DELIMITERS_INVALID,
):
self.fatal_errors += schema_util.format_error(line_number, line, warning_message, error_code)